diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index c196dce925fdb3fa6abe22d93eca9032bfc21ccd..bd4c2f8905a8654218953eec8a00390580d079e5 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1405,6 +1405,7 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) uid_t uid; uint16_t node_cnt = 0; slurm_addr *node_addr = NULL; + int immediate = job_desc_msg->immediate; start_time = clock(); debug("Processing RPC: REQUEST_RESOURCE_ALLOCATION"); @@ -1420,7 +1421,6 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) } if (error_code == SLURM_SUCCESS) { - int immediate = job_desc_msg->immediate; lock_slurmctld(job_write_lock); error_code = job_allocate(job_desc_msg, &job_id, &node_list_ptr, &num_cpu_groups, @@ -1431,13 +1431,9 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) } /* return result */ - if ((error_code != SLURM_SUCCESS) && - (error_code != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)) { - info("_slurm_rpc_allocate_resources time=%ld, error=%s ", - (long) (clock() - start_time), - slurm_strerror(error_code)); - slurm_send_rc_msg(msg, error_code); - } else { + if ((error_code == SLURM_SUCCESS) || + ((immediate == 0) && + (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE))) { info( "_slurm_rpc_allocate_resources allocated nodes %s to JobId=%u, time=%ld", node_list_ptr, job_id, (long) (clock() - start_time)); @@ -1456,6 +1452,11 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) slurm_send_node_msg(msg->conn_fd, &response_msg); (void) dump_all_job_state(); + } else { /* Fatal error */ + info("_slurm_rpc_allocate_resources time=%ld, error=%s ", + (long) (clock() - start_time), + slurm_strerror(error_code)); + slurm_send_rc_msg(msg, error_code); } } diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index a6acdcb774666475dc7765524b497f507c37e6ab..c85afbdfac4f1224deaeb63fc2d1ce7114bad99e 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -928,7 +928,7 @@ void dump_job_desc(job_desc_msg_t * job_specs) { long job_id, min_procs, min_memory, min_tmp_disk, num_procs; long min_nodes, max_nodes, time_limit, priority, contiguous; - long kill_on_node_fail, shared, task_dist; + long kill_on_node_fail, shared, task_dist, immediate; if (job_specs == NULL) return; @@ -955,8 +955,9 @@ void dump_job_desc(job_desc_msg_t * job_specs) (job_specs->min_nodes != NO_VAL) ? job_specs->min_nodes : -1; max_nodes = (job_specs->max_nodes != NO_VAL) ? job_specs->max_nodes : -1; - debug3(" num_procs=%ld min_nodes=%ld max_nodes=%ld", - num_procs, min_nodes, max_nodes); + immediate = (job_specs->immediate == 0) ? 0 : 1; + debug3(" num_procs=%ld min_nodes=%ld max_nodes=%ld immediate=%ld", + num_procs, min_nodes, max_nodes, immediate); debug3(" req_nodes=%s exc_nodes=%s", job_specs->req_nodes, job_specs->exc_nodes); @@ -1068,8 +1069,13 @@ int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, error_code = _job_create(job_specs, new_job_id, allocate, will_run, &job_ptr, submit_uid); - if (error_code) + if (error_code) { + if (immediate && job_ptr) { + job_ptr->job_state = JOB_FAILED; + job_ptr->end_time = 0; + } return error_code; + } if (job_ptr == NULL) fatal("job_allocate: allocated job %u lacks record", new_job_id); @@ -1297,6 +1303,7 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, struct part_record *part_ptr; bitstr_t *req_bitmap = NULL, *exc_bitmap = NULL; + *job_rec_ptr = (struct job_record *) NULL; if ((error_code = _validate_job_desc(job_desc, allocate, submit_uid))) return error_code;