diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 271e8dfde9a4c1b2bda616305114e0f72fb074ad..b233db616069a451c575aa2b99d8fd0798401d86 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -477,6 +477,7 @@ static void *_thread_per_node_rpc(void *args) state_t thread_state = DSH_NO_RESP; sigset_t set; #if AGENT_IS_THREAD + struct node_record *node_ptr; /* Locks: Write write node */ slurmctld_lock_t node_write_lock = { NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK }; @@ -545,26 +546,31 @@ static void *_thread_per_node_rpc(void *args) goto cleanup; } +#if AGENT_IS_THREAD + /* SPECIAL CASE: Immediately mark node as IDLE */ + if ((task_ptr->msg_type == REQUEST_REVOKE_JOB_CREDENTIAL) && + (node_ptr = find_node_record(thread_ptr->node_name))) { + revoke_credential_msg_t *revoke_job_cred; + revoke_job_cred = (revoke_credential_msg_t *) + task_ptr->msg_args_ptr; + node_ptr = find_node_record(thread_ptr->node_name); + debug3("Revoke on node %s job_id %u", + thread_ptr->node_name, revoke_job_cred->job_id); + lock_slurmctld(node_write_lock); + make_node_idle(node_ptr, + find_job_record(revoke_job_cred->job_id)); + unlock_slurmctld(node_write_lock); + /* scheduler(); Overhead too high, + * only do when last node registers */ + } +#endif + switch (response_msg->msg_type) { case RESPONSE_SLURM_RC: slurm_rc_msg = (return_code_msg_t *) response_msg->data; rc = slurm_rc_msg->return_code; slurm_free_return_code_msg(slurm_rc_msg); if (rc == 0) { -#if AGENT_IS_THREAD - /* SPECIAL CASE: Immediately mark node as idle */ - if ((task_ptr->msg_type == - REQUEST_REVOKE_JOB_CREDENTIAL) && - (rc == SLURM_SUCCESS)) { - lock_slurmctld(node_write_lock); - make_node_idle( - find_node_record( - thread_ptr->node_name)); - unlock_slurmctld(node_write_lock); - /* scheduler(); Overhead too high, - * do when last node registers */ - } -#endif debug3("agent processed RPC to node %s", thread_ptr->node_name); thread_state = DSH_DONE; diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 920dd1953750f20bc3f69837a3e732cd4c33cbd0..cc393ee3da2aaabedd843e16d17a43800c3aaf29 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -214,7 +214,7 @@ int main(int argc, char *argv[]) } if ((error_code = getnodename(node_name, MAX_NAME_LEN))) - fatal("getnodename error %d", error_code); + fatal("getnodename error %s", slurm_strerror(error_code)); /* init ssl job credential stuff */ slurm_ssl_init(); @@ -346,12 +346,10 @@ static void *_slurmctld_signal_hand(void *no_data) info("Reconfigure signal (SIGHUP) received"); lock_slurmctld(config_write_lock); error_code = read_slurm_conf(0); - if (error_code == 0) - reset_job_bitmaps(); unlock_slurmctld(config_write_lock); if (error_code) - error("read_slurm_conf error %d", - error_code); + error("read_slurm_conf error %s", + slurm_strerror(error_code)); else _update_logging(); break; @@ -509,9 +507,8 @@ static void *_slurmctld_background(void *no_data) WRITE_LOCK, NO_LOCK }; /* Locks: Write partition */ - slurmctld_lock_t part_write_lock = { NO_LOCK, NO_LOCK, - NO_LOCK, WRITE_LOCK - }; + slurmctld_lock_t part_write_lock = { + NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; /* Let the dust settle before doing work */ now = time(NULL); @@ -788,9 +785,8 @@ static void _slurm_rpc_dump_conf(slurm_msg_t * msg) last_update_msg_t *last_time_msg = (last_update_msg_t *) msg->data; slurm_ctl_conf_info_msg_t config_tbl; /* Locks: Read config */ - slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, - NO_LOCK, NO_LOCK - }; + slurmctld_lock_t config_read_lock = { + READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; start_time = clock(); debug("Processing RPC: REQUEST_BUILD_INFO"); @@ -828,9 +824,8 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) job_info_request_msg_t *last_time_msg = (job_info_request_msg_t *) msg->data; /* Locks: Read job */ - slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, - NO_LOCK, NO_LOCK - }; + slurmctld_lock_t job_read_lock = { + NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; start_time = clock(); debug("Processing RPC: REQUEST_JOB_INFO"); @@ -868,9 +863,8 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg) slurm_msg_t response_msg; last_update_msg_t *last_time_msg = (last_update_msg_t *) msg->data; /* Locks: Read node */ - slurmctld_lock_t node_read_lock = { NO_LOCK, NO_LOCK, - READ_LOCK, NO_LOCK - }; + slurmctld_lock_t node_read_lock = { + NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; start_time = clock(); debug("Processing RPC: REQUEST_NODE_INFO"); @@ -908,9 +902,8 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg) slurm_msg_t response_msg; last_update_msg_t *last_time_msg = (last_update_msg_t *) msg->data; /* Locks: Read partition */ - slurmctld_lock_t part_read_lock = { NO_LOCK, NO_LOCK, - NO_LOCK, READ_LOCK - }; + slurmctld_lock_t part_read_lock = { + NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; start_time = clock(); debug("Processing RPC: REQUEST_PARTITION_INFO"); @@ -944,14 +937,13 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg) static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) { /* init */ - int error_code = 0; + int error_code = SLURM_SUCCESS; clock_t start_time; job_step_kill_msg_t *job_step_kill_msg = (job_step_kill_msg_t *) msg->data; /* Locks: Write job, write node */ - slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, - WRITE_LOCK, NO_LOCK - }; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; uid_t uid; start_time = clock(); @@ -968,13 +960,14 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) /* return result */ if (error_code) { info( - "_slurm_rpc_job_step_kill error %d for %u, time=%ld", - error_code, job_step_kill_msg->job_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_kill JobId=%u, time=%ld, error=%s", + job_step_kill_msg->job_id, + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_job_step_kill success for JobId=%u, time=%ld", + "_slurm_rpc_job_step_kill JobId=%u, time=%ld, success", job_step_kill_msg->job_id, (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); @@ -993,14 +986,15 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) /* return result */ if (error_code) { info( - "_slurm_rpc_job_step_kill error %d for %u.%u, time=%ld", - error_code, job_step_kill_msg->job_id, + "_slurm_rpc_job_step_kill StepId=%u.%u, time=%ld, error=%s", + job_step_kill_msg->job_id, job_step_kill_msg->job_step_id, - (long) (clock() - start_time)); + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_job_step_kill success for %u.%u, time=%ld", + "_slurm_rpc_job_step_kill StepId=%u.%u, time=%ld, success", job_step_kill_msg->job_id, job_step_kill_msg->job_step_id, (long) (clock() - start_time)); @@ -1031,7 +1025,7 @@ static void _slurm_rpc_job_step_complete(slurm_msg_t * msg) lock_slurmctld(job_write_lock); /* do RPC call */ - /* First set node down as needed on fatal error */ + /* First set node DOWN if fatal error */ if (complete_job_step_msg->slurm_rc == ESLURM_ALREADY_DONE) { /* race condition on job termination, not a real error */ info("slurmd error running job %u from node %s: %s", @@ -1072,13 +1066,14 @@ static void _slurm_rpc_job_step_complete(slurm_msg_t * msg) /* return result */ if (error_code) { info( - "_slurm_rpc_job_step_complete error %d for %u, time=%ld", - error_code, complete_job_step_msg->job_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_complete JobId=%u, time=%ld, error=%s", + complete_job_step_msg->job_id, + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_job_step_complete success for JobId=%u, time=%ld", + "_slurm_rpc_job_step_complete JobId=%u, time=%ld, success", complete_job_step_msg->job_id, (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); @@ -1096,14 +1091,15 @@ static void _slurm_rpc_job_step_complete(slurm_msg_t * msg) /* return result */ if (error_code) { info( - "_slurm_rpc_job_step_complete error %d for %u.%u, time=%ld", - error_code, complete_job_step_msg->job_id, + "_slurm_rpc_job_step_complete StepId=%u.%u, time=%ld, error=%s", + complete_job_step_msg->job_id, complete_job_step_msg->job_step_id, - (long) (clock() - start_time)); + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_job_step_complete success for %u.%u, time=%ld", + "_slurm_rpc_job_step_complete StepId=%u.%u, time=%ld, success", complete_job_step_msg->job_id, complete_job_step_msg->job_step_id, (long) (clock() - start_time)); @@ -1119,7 +1115,7 @@ static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg) clock_t start_time; void *resp_buffer = NULL; int resp_buffer_size = 0; - int error_code = 0; + int error_code = SLURM_SUCCESS; job_step_info_request_msg_t *request = (job_step_info_request_msg_t *) msg->data; /* Locks: Read job */ @@ -1153,8 +1149,9 @@ static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg) (long) (clock() - start_time)); else if (error_code) error - ("_slurm_rpc_job_step_get_info, error %d, time=%ld", - error_code, (long) (clock() - start_time)); + ("_slurm_rpc_job_step_get_info, time=%ld, error=%s", + (long) (clock() - start_time), + slurm_strerror(error_code)); } if (error_code) @@ -1182,9 +1179,8 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg) clock_t start_time; job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; /* Locks: Write job, read node, read partition */ - slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, - READ_LOCK, READ_LOCK - }; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; uid_t uid; start_time = clock(); @@ -1199,9 +1195,9 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg) /* return result */ if (error_code) { error( - "_slurm_rpc_update_job error %d for job id %u, time=%ld", - error_code, job_desc_msg->job_id, - (long) (clock() - start_time)); + "_slurm_rpc_update_job JobID=%u, time=%ld, error=%s", + job_desc_msg->job_id, (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( @@ -1219,14 +1215,13 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg) static void _slurm_rpc_update_node(slurm_msg_t * msg) { /* init */ - int error_code = 0; + int error_code = SLURM_SUCCESS; clock_t start_time; update_node_msg_t *update_node_msg_ptr = (update_node_msg_t *) msg->data; /* Locks: Write node */ - slurmctld_lock_t node_write_lock = { NO_LOCK, NO_LOCK, - WRITE_LOCK, NO_LOCK - }; + slurmctld_lock_t node_write_lock = { + NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK }; uid_t uid; start_time = clock(); @@ -1238,7 +1233,7 @@ static void _slurm_rpc_update_node(slurm_msg_t * msg) (unsigned int) uid); } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { /* do RPC call */ lock_slurmctld(node_write_lock); error_code = update_node(update_node_msg_ptr); @@ -1247,10 +1242,10 @@ static void _slurm_rpc_update_node(slurm_msg_t * msg) /* return result */ if (error_code) { - error - ("_slurm_rpc_update_node error %d for node %s, time=%ld", - error_code, update_node_msg_ptr->node_names, - (long) (clock() - start_time)); + error("_slurm_rpc_update_node node=%s, time=%ld, error=%s", + update_node_msg_ptr->node_names, + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( @@ -1271,13 +1266,12 @@ static void _slurm_rpc_update_node(slurm_msg_t * msg) static void _slurm_rpc_update_partition(slurm_msg_t * msg) { /* init */ - int error_code = 0; + int error_code = SLURM_SUCCESS; clock_t start_time; update_part_msg_t *part_desc_ptr = (update_part_msg_t *) msg->data; /* Locks: Read node, write partition */ - slurmctld_lock_t part_write_lock = { NO_LOCK, NO_LOCK, - READ_LOCK, WRITE_LOCK - }; + slurmctld_lock_t part_write_lock = { + NO_LOCK, NO_LOCK, READ_LOCK, WRITE_LOCK }; uid_t uid; start_time = clock(); @@ -1290,7 +1284,7 @@ static void _slurm_rpc_update_partition(slurm_msg_t * msg) (unsigned int) uid); } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { /* do RPC call */ lock_slurmctld(part_write_lock); error_code = update_part(part_desc_ptr); @@ -1300,9 +1294,10 @@ static void _slurm_rpc_update_partition(slurm_msg_t * msg) /* return result */ if (error_code) { error( - "_slurm_rpc_update_partition error %d for partition %s, time=%ld", - error_code, part_desc_ptr->name, - (long) (clock() - start_time)); + "_slurm_rpc_update_partition partition=%s, time=%ld, error=%s", + part_desc_ptr->name, + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( @@ -1321,16 +1316,15 @@ static void _slurm_rpc_update_partition(slurm_msg_t * msg) static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) { /* init */ - int error_code = 0; + int error_code = SLURM_SUCCESS; clock_t start_time; uint32_t job_id; slurm_msg_t response_msg; submit_response_msg_t submit_msg; job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; /* Locks: Write job, read node, read partition */ - slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, - READ_LOCK, READ_LOCK - }; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; uid_t uid; start_time = clock(); @@ -1345,7 +1339,7 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) error("Security violation, SUBMIT_JOB from uid %u", (unsigned int) uid); } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { lock_slurmctld(job_write_lock); error_code = job_allocate(job_desc_msg, &job_id, (char **) NULL, @@ -1358,8 +1352,9 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) /* return result */ if (error_code) { - info("_slurm_rpc_submit_batch_job error %d, time=%ld", - error_code, (long) (clock() - start_time)); + info("_slurm_rpc_submit_batch_job time=%ld, error=%s", + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( @@ -1380,7 +1375,7 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) { /* init */ - int error_code = 0; + int error_code = SLURM_SUCCESS; slurm_msg_t response_msg; clock_t start_time; job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; @@ -1390,9 +1385,8 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) uint32_t job_id; resource_allocation_response_msg_t alloc_msg; /* Locks: Write job, write node, read partition */ - slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, - WRITE_LOCK, READ_LOCK - }; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; uid_t uid; uint16_t node_cnt; slurm_addr *node_addr; @@ -1409,7 +1403,7 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) error("Security violation, RESOURCE_ALLOCATE from uid %u", (unsigned int) uid); } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { int immediate = job_desc_msg->immediate; lock_slurmctld(job_write_lock); error_code = job_allocate(job_desc_msg, &job_id, @@ -1422,8 +1416,9 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) /* return result */ if (error_code) { - info("_slurm_rpc_allocate_resources error %d, time=%ld", - error_code, (long) (clock() - start_time)); + info("_slurm_rpc_allocate_resources time=%ld, error=%s ", + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( @@ -1452,7 +1447,7 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) { /* init */ - int error_code = 0; + int error_code = SLURM_SUCCESS; slurm_msg_t response_msg; clock_t start_time; job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; @@ -1464,8 +1459,8 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) struct step_record *step_rec; job_step_create_request_msg_t req_step_msg; /* Locks: Write job, write node, read partition */ - slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, - WRITE_LOCK, READ_LOCK }; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; uid_t uid; uint16_t node_cnt; slurm_addr *node_addr; @@ -1484,7 +1479,7 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) (unsigned int) uid); } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { int immediate = true; /* job_desc_msg->immediate == true */ lock_slurmctld(job_write_lock); error_code = job_allocate(job_desc_msg, &job_id, @@ -1497,9 +1492,9 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) /* return result */ if (error_code) { unlock_slurmctld(job_write_lock); - info( - "_slurm_rpc_allocate_and_run error %d allocating resources, time=%ld", - error_code, (long) (clock() - start_time)); + info("_slurm_rpc_allocate_and_run time=%ld, error=%s", + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); return; } @@ -1516,8 +1511,9 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) job_complete(job_id, job_desc_msg->user_id, false, 0); unlock_slurmctld(job_write_lock); info( - "_slurm_rpc_allocate_and_run error %d creating job step, time=%ld", - error_code, (long) (clock() - start_time)); + "_slurm_rpc_allocate_and_run creating job step, time=%ld, error=%s", + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { @@ -1555,7 +1551,7 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) /* _slurm_rpc_old_job_alloc - process RPC to get details on existing job */ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) { - int error_code = 0; + int error_code = SLURM_SUCCESS; slurm_msg_t response_msg; clock_t start_time; old_job_alloc_msg_t *job_desc_msg = @@ -1565,8 +1561,8 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) uint32_t *cpus_per_node = NULL, *cpu_count_reps = NULL; resource_allocation_response_msg_t alloc_msg; /* Locks: Read job, read node */ - slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, - READ_LOCK, NO_LOCK }; + slurmctld_lock_t job_read_lock = { + NO_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; uint16_t node_cnt; slurm_addr *node_addr; uid_t uid; @@ -1581,7 +1577,7 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) error("Security violation, RESOURCE_ALLOCATE from uid %u", (unsigned int) uid); } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { lock_slurmctld(job_read_lock); error_code = old_job_info(job_desc_msg->uid, job_desc_msg->job_id, @@ -1593,10 +1589,11 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) /* return result */ if (error_code) { - info( - "_slurm_rpc_old_job_alloc error %d getting info, job=%u, uid=%u, time=%ld", - error_code, job_desc_msg->job_id, job_desc_msg->uid, - (long) (clock() - start_time)); + debug( + "_slurm_rpc_old_job_alloc: JobId=%u, uid=%u, time=%ld, error=%s", + job_desc_msg->job_id, job_desc_msg->uid, + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( @@ -1625,7 +1622,7 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) static void _slurm_rpc_job_will_run(slurm_msg_t * msg) { /* init */ - int error_code = 0; + int error_code = SLURM_SUCCESS; clock_t start_time; uint16_t num_cpu_groups = 0; uint32_t *cpus_per_node = NULL, *cpu_count_reps = NULL; @@ -1633,9 +1630,8 @@ static void _slurm_rpc_job_will_run(slurm_msg_t * msg) job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; char *node_list_ptr = NULL; /* Locks: Write job, read node, read partition */ - slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, - READ_LOCK, READ_LOCK - }; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; uid_t uid; start_time = clock(); @@ -1651,7 +1647,7 @@ static void _slurm_rpc_job_will_run(slurm_msg_t * msg) (unsigned int) uid); } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { lock_slurmctld(job_write_lock); error_code = job_allocate(job_desc_msg, &job_id, &node_list_ptr, &num_cpu_groups, @@ -1663,8 +1659,9 @@ static void _slurm_rpc_job_will_run(slurm_msg_t * msg) /* return result */ if (error_code) { - info("_slurm_rpc_job_will_run error %d, time=%ld", - error_code, (long) (clock() - start_time)); + info("_slurm_rpc_job_will_run time=%ld, error=%s", + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info("_slurm_rpc_job_will_run success for , time=%ld", @@ -1699,9 +1696,8 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) int error_code = SLURM_SUCCESS; clock_t start_time; /* Locks: Write configuration, job, node and partition */ - slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK, - WRITE_LOCK, WRITE_LOCK - }; + slurmctld_lock_t config_write_lock = { + WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; uid_t uid; start_time = clock(); @@ -1717,10 +1713,8 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) if (error_code == SLURM_SUCCESS) { lock_slurmctld(config_write_lock); error_code = read_slurm_conf(0); - if (error_code == SLURM_SUCCESS) { - reset_job_bitmaps(); + if (error_code == SLURM_SUCCESS) msg_to_slurmd(REQUEST_RECONFIGURE); - } unlock_slurmctld(config_write_lock); } if (error_code == SLURM_SUCCESS) { /* Stuff to do after unlock */ @@ -1735,12 +1729,13 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) /* return result */ if (error_code) { error( - "_slurm_rpc_reconfigure_controller error %d, time=%ld", - error_code, (long) (clock() - start_time)); + "_slurm_rpc_reconfigure_controller: time=%ld, error=%s", + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_reconfigure_controller completed, time=%ld", + "_slurm_rpc_reconfigure_controller: completed, time=%ld", (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); schedule(); @@ -1752,14 +1747,13 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) /* _slurm_rpc_shutdown_controller - process RPC to shutdown slurmctld */ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) { - int error_code = 0, i; + int error_code = SLURM_SUCCESS, i; uint16_t core_arg = 0; shutdown_msg_t *shutdown_msg = (shutdown_msg_t *) msg->data; uid_t uid; /* Locks: Read node */ - slurmctld_lock_t node_read_lock = { NO_LOCK, NO_LOCK, - READ_LOCK, NO_LOCK - }; + slurmctld_lock_t node_read_lock = { + NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; uid = g_slurm_auth_get_uid(msg->cred); if ((uid != 0) && (uid != getuid())) { @@ -1803,7 +1797,7 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) sleep(1); } slurm_send_rc_msg(msg, error_code); - if ((error_code == 0) && core_arg) + if ((error_code == SLURM_SUCCESS) && core_arg) fatal("Aborting per RPC request"); } @@ -1811,7 +1805,7 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) * slurmctld */ static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg) { - int error_code = 0; + int error_code = SLURM_SUCCESS; uid_t uid; uid = g_slurm_auth_get_uid(msg->cred); @@ -1824,7 +1818,7 @@ static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg) /* do RPC call */ /* No op: just used to knock loose accept RPC thread */ - if (error_code == 0) + if (error_code == SLURM_SUCCESS) debug("Performing RPC: REQUEST_SHUTDOWN_IMMEDIATE"); } @@ -1833,7 +1827,7 @@ static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg) static void _slurm_rpc_job_step_create(slurm_msg_t * msg) { /* init */ - int error_code = 0; + int error_code = SLURM_SUCCESS; clock_t start_time; slurm_msg_t resp; @@ -1842,9 +1836,8 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) job_step_create_request_msg_t *req_step_msg = (job_step_create_request_msg_t *) msg->data; /* Locks: Write jobs, read nodes */ - slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, - READ_LOCK, NO_LOCK - }; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK }; uid_t uid; start_time = clock(); @@ -1859,7 +1852,7 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) (unsigned int) uid); } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { /* issue the RPC */ lock_slurmctld(job_write_lock); error_code = step_create(req_step_msg, &step_rec, false); @@ -1868,12 +1861,12 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) /* return result */ if (error_code) { unlock_slurmctld(job_write_lock); - info("_slurm_rpc_job_step_create error %s, time=%ld", - slurm_strerror(error_code), - (long) (clock() - start_time)); + info("_slurm_rpc_job_step_create: time=%ld error=%s", + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { - info("_slurm_rpc_job_step_create %u.%u success time=%ld", + info("_slurm_rpc_job_step_create: %u.%u success time=%ld", step_rec->job_ptr->job_id, step_rec->step_id, (long) (clock() - start_time)); @@ -1909,9 +1902,8 @@ static void _slurm_rpc_node_registration(slurm_msg_t * msg) slurm_node_registration_status_msg_t *node_reg_stat_msg = (slurm_node_registration_status_msg_t *) msg->data; /* Locks: Write job and node */ - slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, - WRITE_LOCK, NO_LOCK - }; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; uid_t uid; start_time = clock(); @@ -1922,7 +1914,7 @@ static void _slurm_rpc_node_registration(slurm_msg_t * msg) error("Security violation, NODE_REGISTER RPC from uid %u", (unsigned int) uid); } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { /* do RPC call */ lock_slurmctld(job_write_lock); validate_jobs_on_node(node_reg_stat_msg->node_name, @@ -1944,9 +1936,10 @@ static void _slurm_rpc_node_registration(slurm_msg_t * msg) /* return result */ if (error_code) { error( - "_slurm_rpc_node_registration error %d for %s, time=%ld", - error_code, node_reg_stat_msg->node_name, - (long) (clock() - start_time)); + "_slurm_rpc_node_registration node=%s, time=%ld, error=%s", + node_reg_stat_msg->node_name, + (long) (clock() - start_time), + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { info( @@ -2188,7 +2181,6 @@ static void _run_backup(void) if (read_slurm_conf(1)) /* Recover all state */ fatal("Unable to recover slurm state"); - reset_job_bitmaps(); shutdown_time = (time_t) 0; return; } @@ -2279,7 +2271,7 @@ static void *_background_rpc_mgr(void *no_data) error("slurm_receive_msg error %m"); else { error_code = _background_process_msg(msg); - if ((error_code == 0) && + if ((error_code == SLURM_SUCCESS) && (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE)) done_flag = true; } @@ -2299,7 +2291,7 @@ static void *_background_rpc_mgr(void *no_data) /* _background_process_msg - process an RPC to the backup_controller */ static int _background_process_msg(slurm_msg_t * msg) { - int error_code = 0; + int error_code = SLURM_SUCCESS; uid_t uid; uid = g_slurm_auth_get_uid(msg->cred); @@ -2309,7 +2301,7 @@ static int _background_process_msg(slurm_msg_t * msg) error_code = ESLURM_USER_ID_MISSING; } - if (error_code == 0) { + if (error_code == SLURM_SUCCESS) { if (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE) { debug3 ("Performing RPC: REQUEST_SHUTDOWN_IMMEDIATE"); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index c4c6b298af0f8195d03f3aa5fc5cb6634e724ede..1ac821e1df0dc83ff8628c67d680e1e00976f19a 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -56,6 +56,7 @@ #include "src/common/list.h" #include "src/common/macros.h" #include "src/common/pack.h" +#include "src/common/xassert.h" #include "src/common/xstring.h" #include "src/slurmctld/agent.h" @@ -108,6 +109,7 @@ static void _excise_node_from_job(struct job_record *job_record_ptr, struct node_record *node_record_ptr); static int _find_batch_dir(void *x, void *key); static void _get_batch_job_dir_ids(List batch_dirs); +static void _job_timed_out(struct job_record *job_ptr); static int _job_create(job_desc_msg_t * job_specs, uint32_t * new_job_id, int allocate, int will_run, struct job_record **job_rec_ptr, uid_t submit_uid); @@ -123,6 +125,8 @@ static void _read_data_array_from_file(char *file_name, char ***data, uint16_t * size); static void _read_data_from_file(char *file_name, char **data); static void _remove_defunct_batch_dirs(List batch_dirs); +static void _reset_detail_bitmaps(struct job_record *job_ptr); +static void _reset_step_bitmaps(struct job_record *job_ptr); static void _set_job_id(struct job_record *job_ptr); static void _set_job_prio(struct job_record *job_ptr); static void _signal_job_on_node(uint32_t job_id, uint16_t step_id, @@ -168,17 +172,17 @@ struct job_record *create_job_record(int *error_code) job_details_point = (struct job_details *) xmalloc(sizeof(struct job_details)); - job_record_point->magic = JOB_MAGIC; + xassert (job_record_point->magic = JOB_MAGIC); /* sets value */ job_record_point->details = job_details_point; job_record_point->step_list = list_create(NULL); if (job_record_point->step_list == NULL) - fatal("list_create can not allocate memory"); + fatal("memory allocation failure"); - job_details_point->magic = DETAILS_MAGIC; + xassert (job_details_point->magic = DETAILS_MAGIC); /* set value */ job_details_point->submit_time = time(NULL); - if (list_append(job_list, job_record_point) == NULL) - fatal("create_job_record: unable to allocate memory"); + if (list_append(job_list, job_record_point) == 0) + fatal("list_append memory allocation failure"); return job_record_point; } @@ -196,9 +200,7 @@ void delete_job_details(struct job_record *job_entry) return; _delete_job_desc_files(job_entry->job_id); - if (job_entry->details->magic != DETAILS_MAGIC) - fatal - ("delete_job_details: passed invalid job details pointer"); + xassert (job_entry->details->magic == DETAILS_MAGIC); xfree(job_entry->details->req_nodes); xfree(job_entry->details->exc_nodes); FREE_NULL_BITMAP(job_entry->details->req_node_bitmap); @@ -259,8 +261,7 @@ int dump_all_job_state(void) job_record_iterator = list_iterator_create(job_list); while ((job_record_point = (struct job_record *) list_next(job_record_iterator))) { - if (job_record_point->magic != JOB_MAGIC) - fatal("dump_all_job: job integrity is bad"); + xassert (job_record_point->magic == JOB_MAGIC); _dump_job_state(job_record_point, buffer); } unlock_slurmctld(job_read_lock); @@ -405,8 +406,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) /* Dump job details, if available */ detail_ptr = dump_job_ptr->details; if (detail_ptr) { - if (detail_ptr->magic != DETAILS_MAGIC) - fatal("dump_all_job: job detail integrity is bad"); + xassert (detail_ptr->magic == DETAILS_MAGIC); pack16((uint16_t) DETAILS_FLAG, buffer); _dump_job_details(detail_ptr, buffer); } else @@ -459,7 +459,8 @@ static int _load_job_state(Buf buffer) safe_unpackstr_xmalloc(&alloc_node, &name_len, buffer); /* validity test as possible */ - if ((job_state >= JOB_END) || (batch_flag > 1)) { + if (((job_state & (~JOB_COMPLETING)) >= JOB_END) || + (batch_flag > 1)) { error("Invalid data for job %u: job_state=%u batch_flag=%u", job_id, job_state, batch_flag); goto unpack_error; @@ -479,16 +480,16 @@ static int _load_job_state(Buf buffer) nodes, job_id); goto unpack_error; } + part_ptr = list_find_first(part_list, &list_find_part, + partition); + if (part_ptr == NULL) { + error("Invalid partition (%s) for job_id %u", + partition, job_id); + goto unpack_error; + } job_ptr = find_job_record(job_id); if (job_ptr == NULL) { - part_ptr = list_find_first(part_list, &list_find_part, - partition); - if (part_ptr == NULL) { - info("Invalid partition (%s) for job_id %u", - partition, job_id); - goto unpack_error; - } job_ptr = create_job_record(&error_code); if (error_code) { error("Create job entry failed for job_id %u", @@ -496,8 +497,6 @@ static int _load_job_state(Buf buffer) goto unpack_error; } job_ptr->job_id = job_id; - strncpy(job_ptr->partition, partition, MAX_NAME_LEN); - job_ptr->part_ptr = part_ptr; _add_job_hash(job_ptr); } @@ -517,20 +516,28 @@ static int _load_job_state(Buf buffer) job_ptr->alloc_sid = alloc_sid; job_ptr->start_time = start_time; job_ptr->end_time = end_time; - job_ptr->time_last_active = time(NULL); job_ptr->job_state = job_state; job_ptr->next_step_id = next_step_id; + job_ptr->time_last_active = time(NULL); strncpy(job_ptr->name, name, MAX_NAME_LEN); xfree(name); - job_ptr->nodes = nodes; + xfree(job_ptr->nodes); + job_ptr->nodes = nodes; nodes = NULL; /* reused, nothing left to free */ - job_ptr->alloc_node = alloc_node; + xfree(job_ptr->alloc_node); + job_ptr->alloc_node = alloc_node; alloc_node = NULL; /* reused, nothing left to free */ - job_ptr->node_bitmap = node_bitmap; + FREE_NULL_BITMAP(job_ptr->node_bitmap); + job_ptr->node_bitmap = node_bitmap; + strncpy(job_ptr->partition, partition, MAX_NAME_LEN); xfree(partition); + job_ptr->part_ptr = part_ptr; job_ptr->kill_on_node_fail = kill_on_node_fail; job_ptr->kill_on_step_done = kill_on_step_done; job_ptr->batch_flag = batch_flag; + build_node_details(job_ptr); /* set: num_cpu_groups, cpus_per_node, + * cpu_count_reps, node_cnt, and + * node_addr */ info("recovered job id %u", job_id); safe_unpack16(&step_flag, buffer); @@ -639,6 +646,17 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer) goto unpack_error; } + /* free any left-over data */ + xfree(job_ptr->details->req_nodes); + FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap); + xfree(job_ptr->details->exc_nodes); + FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); + xfree(job_ptr->details->features); + xfree(job_ptr->details->err); + xfree(job_ptr->details->in); + xfree(job_ptr->details->out); + xfree(job_ptr->details->work_dir); + /* now put the details into the job record */ memcpy(&job_ptr->details->credential, credential_ptr, sizeof(job_ptr->details->credential)); @@ -661,9 +679,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer) job_ptr->details->in = in; job_ptr->details->out = out; job_ptr->details->work_dir = work_dir; - build_node_details(job_ptr); /* set: num_cpu_groups, cpus_per_node, - * cpu_count_reps, node_cnt, and - * node_addr */ + return SLURM_SUCCESS; unpack_error: @@ -720,13 +736,21 @@ static int _load_step_state(struct job_record *job_ptr, Buf buffer) goto unpack_error; } - step_ptr = create_step_record(job_ptr); + step_ptr = find_step_record(job_ptr, step_id); + if (step_ptr == NULL) + step_ptr = create_step_record(job_ptr); if (step_ptr == NULL) return SLURM_FAILURE; - step_ptr->step_id = step_id; + + /* free any left-over values */ + xfree(step_ptr->step_node_list); + FREE_NULL_BITMAP(step_ptr->step_node_bitmap); + + /* set new values */ + step_ptr->step_id = step_id; step_ptr->cyclic_alloc = cyclic_alloc; - step_ptr->num_tasks = num_tasks; - step_ptr->start_time = start_time; + step_ptr->num_tasks = num_tasks; + step_ptr->start_time = start_time; step_ptr->step_node_list = step_node_list; if (step_node_list) (void) node_name2bitmap(step_node_list, @@ -824,8 +848,8 @@ struct job_record *find_running_job_by_node_name(char *node_name) } /* - * kill_running_job_by_node_name - Given a node name, deallocate jobs - * from the node or kill them + * kill_running_job_by_node_name - Given a node name, deallocate RUNNING + * or COMPLETING jobs from the node or kill them * IN node_name - name of a node * IN step_test - if true, only kill the job if a step is running on the node * RET number of killed jobs @@ -833,42 +857,56 @@ struct job_record *find_running_job_by_node_name(char *node_name) int kill_running_job_by_node_name(char *node_name, bool step_test) { ListIterator job_record_iterator; - struct job_record *job_record_point; - struct node_record *node_record_point; + struct job_record *job_ptr; + struct node_record *node_ptr; int bit_position; int job_count = 0; - node_record_point = find_node_record(node_name); - if (node_record_point == NULL) /* No such node */ + node_ptr = find_node_record(node_name); + if (node_ptr == NULL) /* No such node */ return 0; - bit_position = node_record_point - node_record_table_ptr; + bit_position = node_ptr - node_record_table_ptr; job_record_iterator = list_iterator_create(job_list); - while ((job_record_point = + while ((job_ptr = (struct job_record *) list_next(job_record_iterator))) { - if (job_record_point->job_state != JOB_RUNNING) - continue; /* job not active */ - if (!bit_test(job_record_point->node_bitmap, bit_position)) + if ((job_ptr->node_bitmap == NULL) || + (!bit_test(job_ptr->node_bitmap, bit_position))) continue; /* job not on this node */ - if (step_test && - (step_on_node(job_record_point, node_record_point) == 0)) - continue; - error("Running job_id %u on failed node %s", - job_record_point->job_id, node_name); - job_count++; - if ((job_record_point->details == NULL) || - (job_record_point->kill_on_node_fail) || - (job_record_point->node_cnt <= 1)) { - job_record_point->job_state = JOB_NODE_FAIL; - job_record_point->end_time = time(NULL); - deallocate_nodes(job_record_point); - delete_all_step_records(job_record_point); - delete_job_details(job_record_point); - } else { - /* Remove node from this job's list */ - _excise_node_from_job(job_record_point, - node_record_point); - make_node_idle(node_record_point); + if (job_ptr->job_state & JOB_COMPLETING) { + job_count++; + bit_clear(job_ptr->node_bitmap, bit_position); + if (job_ptr->node_cnt) + (job_ptr->node_cnt)--; + else + error("node_cnt underflow on JobId=%u", + job_ptr->job_id); + if (job_ptr->node_cnt == 0) + job_ptr->job_state &= (~JOB_COMPLETING); + if (node_ptr->comp_job_cnt) + (node_ptr->comp_job_cnt)--; + else + error("Node %s comp_job_cnt underflow, JobId=%u", + node_ptr->name, job_ptr->job_id); + } else if (job_ptr->job_state == JOB_RUNNING) { + if (step_test && + (step_on_node(job_ptr, node_ptr) == 0)) + continue; + error("Running job_id %u on failed node %s", + job_ptr->job_id, node_name); + job_count++; + if ((job_ptr->details == NULL) || + (job_ptr->kill_on_node_fail) || + (job_ptr->node_cnt <= 1)) { + job_ptr->job_state = JOB_NODE_FAIL | + JOB_COMPLETING; + job_ptr->end_time = time(NULL); + deallocate_nodes(job_ptr); + delete_all_step_records(job_ptr); + } else { + /* Remove node from this job's list */ + _excise_node_from_job(job_ptr, node_ptr); + } } } @@ -883,10 +921,7 @@ int kill_running_job_by_node_name(char *node_name, bool step_test) static void _excise_node_from_job(struct job_record *job_record_ptr, struct node_record *node_record_ptr) { - int bit_position; - - bit_position = node_record_ptr - node_record_table_ptr; - bit_clear(job_record_ptr->node_bitmap, bit_position); + make_node_idle(node_record_ptr, job_record_ptr); /* clear node_bitmap */ job_record_ptr->nodes = bitmap2node_name(job_record_ptr->node_bitmap); xfree(job_record_ptr->cpus_per_node); xfree(job_record_ptr->cpu_count_reps); @@ -982,7 +1017,8 @@ void dump_job_desc(job_desc_msg_t * job_specs) /* * init_job_conf - initialize the job configuration tables and values. * this should be called after creating node information, but - * before creating any job entries. + * before creating any job entries. Pre-existing job entries are + * left unchanged. * RET 0 if no error, otherwise an error code * global: last_job_update - time of last job table update * job_list - pointer to global job list @@ -993,7 +1029,7 @@ int init_job_conf(void) job_count = 0; job_list = list_create(&_list_delete_job); if (job_list == NULL) - fatal("init_job_conf: No memory"); + fatal ("Memory allocation failure");; } last_job_update = time(NULL); @@ -1133,9 +1169,7 @@ int job_signal(uint32_t job_id, uint16_t signal, uid_t uid) return ESLURM_INVALID_JOB_ID; } - if ((job_ptr->job_state == JOB_FAILED) || - (job_ptr->job_state == JOB_COMPLETE) || - (job_ptr->job_state == JOB_TIMEOUT) || + if ((IS_JOB_FINISHED(job_ptr)) || (job_ptr->kill_on_step_done & KILL_IN_PROGRESS)) return ESLURM_ALREADY_DONE; @@ -1169,18 +1203,16 @@ int job_signal(uint32_t job_id, uint16_t signal, uid_t uid) } list_iterator_destroy (step_record_iterator); - if ((signal == SIGKILL) && - ((job_ptr->kill_on_step_done & KILL_IN_PROGRESS) == 0)) { + if (signal == SIGKILL) { job_ptr->kill_on_step_done = KILL_IN_PROGRESS; job_ptr->time_last_active = now; last_job_update = now; } if ((signal == SIGKILL) && (step_cnt == 0)) { /* kill job with no active steps */ - job_ptr->job_state = JOB_COMPLETE; + job_ptr->job_state = JOB_COMPLETE | JOB_COMPLETING; job_ptr->end_time = now; deallocate_nodes(job_ptr); - delete_job_details(job_ptr); } verbose("job_signal of running job %u successful", job_id); return SLURM_SUCCESS; @@ -1196,7 +1228,7 @@ int job_signal(uint32_t job_id, uint16_t signal, uid_t uid) * IN job_id - id of the job which completed * IN uid - user id of user issuing the RPC * IN requeue - job should be run again if possible - * IN job_return_code - job's return code, if set then set state to JOB_FAILED + * IN job_return_code - job's return code, if set then set state to FAILED * RET - 0 on success, otherwise ESLURM error code * global: job_list - pointer global job list * last_job_update - time of last job table update @@ -1207,6 +1239,7 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, { struct job_record *job_ptr; time_t now = time(NULL); + uint32_t job_comp_flag = 0; job_ptr = find_job_record(job_id); if (job_ptr == NULL) { @@ -1214,10 +1247,7 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, return ESLURM_INVALID_JOB_ID; } - if ((job_ptr->job_state == JOB_FAILED) || - (job_ptr->job_state == JOB_COMPLETE) || - (job_ptr->job_state == JOB_TIMEOUT) || - (job_ptr->job_state == JOB_NODE_FAIL)) + if (IS_JOB_FINISHED(job_ptr)) return ESLURM_ALREADY_DONE; if ((job_ptr->user_id != uid) && (uid != 0) && (uid != getuid())) { @@ -1226,31 +1256,31 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, return ESLURM_USER_ID_MISSING; } - if (job_ptr->job_state == JOB_PENDING) { - verbose("job_complete for job id %u successful", job_id); - } else if (job_ptr->job_state == JOB_RUNNING) { - deallocate_nodes(job_ptr); - verbose("job_complete for job id %u successful", job_id); - } else { - error("job_complete for job id %u from bad state", - job_id, job_ptr->job_state); - } - + if (job_ptr->job_state == JOB_RUNNING) + job_comp_flag = JOB_COMPLETING; if (requeue && job_ptr->details && job_ptr->batch_flag) { - job_ptr->job_state = JOB_PENDING; + job_ptr->job_state = JOB_PENDING | job_comp_flag; info("Requeing job %u", job_ptr->job_id); } else { if (job_return_code) - job_ptr->job_state = JOB_FAILED; - else if (job_ptr->end_time < now) - job_ptr->job_state = JOB_TIMEOUT; + job_ptr->job_state = JOB_FAILED | job_comp_flag; + else if (job_comp_flag && /* job was running */ + (job_ptr->end_time < now)) /* over time limit */ + job_ptr->job_state = JOB_TIMEOUT | job_comp_flag; else - job_ptr->job_state = JOB_COMPLETE; + job_ptr->job_state = JOB_COMPLETE | job_comp_flag; job_ptr->end_time = now; - delete_job_details(job_ptr); delete_all_step_records(job_ptr); } + last_job_update = now; + if (job_comp_flag) { /* job was running */ + deallocate_nodes(job_ptr); + verbose("job_complete for job id %u successful", job_id); + } else { + verbose("job_complete for job id %u successful", job_id); + } + return SLURM_SUCCESS; } @@ -1876,29 +1906,24 @@ void job_time_limit(void) job_record_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_record_iterator))) { - if (job_ptr->magic != JOB_MAGIC) - fatal("job_time_limit: job integrity is bad"); - if ((job_ptr->job_state == JOB_PENDING) || - (job_ptr->job_state == JOB_FAILED) || - (job_ptr->job_state == JOB_COMPLETE) || - (job_ptr->job_state == JOB_TIMEOUT) || - (job_ptr->job_state == JOB_NODE_FAIL)) + xassert (job_ptr->magic == JOB_MAGIC); + if (job_ptr->job_state != JOB_RUNNING) continue; - if ((job_ptr->kill_on_step_done & KILL_IN_PROGRESS) && - (difftime(now, job_ptr->time_last_active) > - JOB_KILL_TIMEOUT)) { + if (job_ptr->kill_on_step_done & KILL_IN_PROGRESS) { + if (difftime(now, job_ptr->time_last_active) <= + JOB_KILL_TIMEOUT) + continue; + last_job_update = now; info("Job_id %u not properly terminating, forcing it", job_ptr->job_id); last_job_update = now; - job_ptr->job_state = JOB_TIMEOUT; job_ptr->end_time = time(NULL); + job_ptr->job_state = JOB_TIMEOUT | JOB_COMPLETING; deallocate_nodes(job_ptr); delete_all_step_records(job_ptr); - delete_job_details(job_ptr); - } - if (job_ptr->kill_on_step_done & KILL_IN_PROGRESS) continue; + } if (slurmctld_conf.inactive_limit) { if (job_ptr->step_list && @@ -1917,14 +1942,29 @@ void job_time_limit(void) last_job_update = now; info("Time limit exhausted for job_id %u, terminating", job_ptr->job_id); - job_signal(job_ptr->job_id, SIGKILL, 0); - if (job_ptr->job_state == JOB_COMPLETE) - job_ptr->job_state = JOB_TIMEOUT; + _job_timed_out(job_ptr); } list_iterator_destroy(job_record_iterator); } +/* Terminate a job that has exhausted its time limit */ +static void _job_timed_out(struct job_record *job_ptr) +{ +#if NEW_TIME_LIMIT_RPC + // FIXME + // SET UP AND ISSUE NEW RPC TO ALL ALLOCATED NODES, + // see deallocate_nodes code for template +#else + job_signal(job_ptr->job_id, SIGKILL, 0); +#endif + + job_ptr->time_last_active = time(NULL); + job_ptr->job_state = JOB_TIMEOUT | JOB_COMPLETING; + job_ptr->kill_on_step_done &= KILL_IN_PROGRESS; + return; +} + /* _validate_job_desc - validate that a job descriptor for job submit or * allocate has valid data, set values to defaults as required * IN job_desc_msg - pointer to job descriptor @@ -1937,7 +1977,7 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, if ((job_desc_msg->num_procs == NO_VAL) && (job_desc_msg->min_nodes == NO_VAL) && (job_desc_msg->req_nodes == NULL)) { - info("_validate_job_desc: job failed to specify num_procs, min_nodes or req_nodes"); + info("Job failed to specify num_procs, min_nodes or req_nodes"); return ESLURM_JOB_MISSING_SIZE_SPECIFICATION; } if ((allocate == SLURM_CREATE_JOB_FLAG_NO_ALLOCATE_0) && @@ -1971,8 +2011,7 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, } dup_job_ptr = find_job_record((uint32_t) job_desc_msg->job_id); if (dup_job_ptr && - ((dup_job_ptr->job_state == JOB_PENDING) || - (dup_job_ptr->job_state == JOB_RUNNING))) { + (!(IS_JOB_FINISHED(dup_job_ptr)))) { info("attempt re-use active job_id %u", job_desc_msg->job_id); return ESLURM_DUPLICATE_JOB_ID; @@ -2011,9 +2050,8 @@ static void _list_delete_job(void *job_entry) job_record_point = (struct job_record *) job_entry; if (job_record_point == NULL) - fatal("_list_delete_job: passed null job pointer"); - if (job_record_point->magic != JOB_MAGIC) - fatal("_list_delete_job: passed invalid job pointer"); + fatal ("_list_delete_job: job_record_point == NULL"); + xassert (job_record_point->magic == JOB_MAGIC); if (job_hash[JOB_HASH_INX(job_record_point->job_id)] == job_record_point) @@ -2070,18 +2108,14 @@ static int _list_find_job_id(void *job_entry, void *key) */ static int _list_find_job_old(void *job_entry, void *key) { - time_t min_age; + time_t min_age = time(NULL) - MIN_JOB_AGE; + struct job_record *job_ptr = (struct job_record *)job_entry; - min_age = time(NULL) - MIN_JOB_AGE; + if (job_ptr->end_time > min_age) + return 0; /* Too new to purge */ - if (((struct job_record *) job_entry)->end_time > min_age) - return 0; - - if ((((struct job_record *) job_entry)->job_state != JOB_COMPLETE) - && (((struct job_record *) job_entry)->job_state != JOB_FAILED) - && (((struct job_record *) job_entry)->job_state != - JOB_TIMEOUT)) - return 0; + if (!(IS_JOB_FINISHED(job_ptr))) + return 0; /* Still active, can't purge */ return 1; } @@ -2120,8 +2154,7 @@ pack_all_jobs(char **buffer_ptr, int *buffer_size) job_record_iterator = list_iterator_create(job_list); while ((job_record_point = (struct job_record *) list_next(job_record_iterator))) { - if (job_record_point->magic != JOB_MAGIC) - fatal("dump_all_job: job integrity is bad"); + xassert (job_record_point->magic == JOB_MAGIC); pack_job(job_record_point, buffer); jobs_packed++; @@ -2254,47 +2287,91 @@ static int _purge_job_record(uint32_t job_id) void reset_job_bitmaps(void) { ListIterator job_record_iterator; - struct job_record *job_record_point; - - if (job_list == NULL) - fatal - ("init_job_conf: list_create can not allocate memory"); + struct job_record *job_ptr; + struct part_record *part_ptr; + if (job_list == NULL) + fatal ("reset_job_bitmaps: job_list == NULL"); job_record_iterator = list_iterator_create(job_list); - while ((job_record_point = + while ((job_ptr = (struct job_record *) list_next(job_record_iterator))) { - if (job_record_point->magic != JOB_MAGIC) - fatal("dump_all_job: job integrity is bad"); - FREE_NULL_BITMAP(job_record_point->node_bitmap); - if (job_record_point->nodes) { - node_name2bitmap(job_record_point->nodes, - &job_record_point->node_bitmap); - if (job_record_point->job_state == JOB_RUNNING) - allocate_nodes(job_record_point-> - node_bitmap); + xassert (job_ptr->magic == JOB_MAGIC); + part_ptr = list_find_first(part_list, &list_find_part, + job_ptr->partition); + if (part_ptr == NULL) { + error("Invalid partition (%s) for job_id %u", + job_ptr->partition, job_ptr->job_id); + job_ptr->job_state = JOB_NODE_FAIL; + } + job_ptr->part_ptr = part_ptr; + FREE_NULL_BITMAP(job_ptr->node_bitmap); + if ((job_ptr->nodes) && + (node_name2bitmap(job_ptr->nodes, &job_ptr->node_bitmap))) { + error("Invalid nodes (%s) for job_id %u", + job_ptr->nodes, job_ptr->job_id); + job_ptr->job_state = JOB_NODE_FAIL; } + build_node_details(job_ptr); /* set: num_cpu_groups, + * cpu_count_reps, node_cnt, + * cpus_per_node, node_addr */ + _reset_detail_bitmaps(job_ptr); + _reset_step_bitmaps(job_ptr); - if (job_record_point->details == NULL) - continue; - FREE_NULL_BITMAP(job_record_point->details->req_node_bitmap); - if (job_record_point->details->req_nodes) - node_name2bitmap(job_record_point->details-> - req_nodes, - &job_record_point->details-> - req_node_bitmap); - FREE_NULL_BITMAP(job_record_point->details->exc_node_bitmap); - if (job_record_point->details->exc_nodes) - node_name2bitmap(job_record_point->details-> - exc_nodes, - &job_record_point->details-> - exc_node_bitmap); + if ((job_ptr->kill_on_step_done) && + (list_count(job_ptr->step_list) <= 1)) + job_ptr->job_state = JOB_NODE_FAIL; } list_iterator_destroy(job_record_iterator); last_job_update = time(NULL); } +static void _reset_detail_bitmaps(struct job_record *job_ptr) +{ + if (job_ptr->details == NULL) + return; + + FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap); + if ((job_ptr->details->req_nodes) && + (node_name2bitmap(job_ptr->details->req_nodes, + &job_ptr->details->req_node_bitmap))) { + error("Invalid req_nodes (%s) for job_id %u", + job_ptr->details->req_nodes, job_ptr->job_id); + job_ptr->job_state = JOB_NODE_FAIL; + } + + FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); + if ((job_ptr->details->exc_nodes) && + (node_name2bitmap(job_ptr->details->exc_nodes, + &job_ptr->details->exc_node_bitmap))) { + error("Invalid exc_nodes (%s) for job_id %u", + job_ptr->details->exc_nodes, job_ptr->job_id); + job_ptr->job_state = JOB_NODE_FAIL; + } +} + +static void _reset_step_bitmaps(struct job_record *job_ptr) +{ + ListIterator step_record_iterator; + struct step_record *step_ptr; + + step_record_iterator = list_iterator_create (job_ptr->step_list); + while ((step_ptr = (struct step_record *) + list_next (step_record_iterator))) { + if ((step_ptr->step_node_list) && + (node_name2bitmap(step_ptr->step_node_list, + &step_ptr->step_node_bitmap))) { + error("Invalid step_node_list (%s) for step_id %u.%u", + step_ptr->step_node_list, + job_ptr->job_id, step_ptr->step_id); + delete_step_record (job_ptr, step_ptr->step_id); + } + } + + list_iterator_destroy (step_record_iterator); + return; +} /* * _set_job_id - set a default job_id, insure that it is unique @@ -2307,8 +2384,9 @@ static void _set_job_id(struct job_record *job_ptr) if (job_id_sequence < 0) job_id_sequence = slurmctld_conf.first_job_id; - if ((job_ptr == NULL) || (job_ptr->magic != JOB_MAGIC)) - fatal("_set_job_id: invalid job_ptr"); + if (job_ptr == NULL) + fatal ("_set_job_id: job_ptr == NULL"); + xassert (job_ptr->magic == JOB_MAGIC); if ((job_ptr->partition == NULL) || (strlen(job_ptr->partition) == 0)) fatal("_set_job_id: partition not set"); @@ -2331,8 +2409,9 @@ static void _set_job_id(struct job_record *job_ptr) */ static void _set_job_prio(struct job_record *job_ptr) { - if ((job_ptr == NULL) || (job_ptr->magic != JOB_MAGIC)) - fatal("_set_job_prio: invalid job_ptr"); + if (job_ptr == NULL) + fatal ("_set_job_prio: job_ptr == NULL"); + xassert (job_ptr->magic == JOB_MAGIC); job_ptr->priority = default_prio--; } @@ -2356,8 +2435,7 @@ static bool _top_priority(struct job_record *job_ptr) job_record_iterator = list_iterator_create(job_list); while ((job_record_point = (struct job_record *) list_next(job_record_iterator))) { - if (job_record_point->magic != JOB_MAGIC) - fatal("_top_priority: job integrity is bad"); + xassert (job_record_point->magic == JOB_MAGIC); if (job_record_point == job_ptr) continue; if (job_record_point->job_state != JOB_PENDING) @@ -2637,6 +2715,7 @@ validate_jobs_on_node(char *node_name, uint32_t * job_count, /* If no job is running here, ensure none are assigned to this node */ if (*job_count == 0) { + debug("Node %s registered with no jobs", node_name); (void) kill_running_job_by_node_name(node_name, true); return; } @@ -2758,7 +2837,7 @@ static void _spawn_signal_agent(agent_arg_t *agent_info) * old_job_info - get details about an existing job allocation * IN uid - job issuing the code * IN job_id - ID of job for which info is requested - * OUT everything else - the job's detains + * OUT everything else - the job's details */ int old_job_info(uint32_t uid, uint32_t job_id, char **node_list, @@ -2773,9 +2852,9 @@ old_job_info(uint32_t uid, uint32_t job_id, char **node_list, return ESLURM_INVALID_JOB_ID; if ((uid != 0) && (job_ptr->user_id != uid)) return ESLURM_ACCESS_DENIED; - if (job_ptr->job_state == JOB_PENDING) + if (IS_JOB_PENDING(job_ptr)) return ESLURM_JOB_PENDING; - if (job_ptr->job_state != JOB_RUNNING) + if (IS_JOB_FINISHED(job_ptr)) return ESLURM_ALREADY_DONE; if (node_list) @@ -2844,7 +2923,8 @@ static void _get_batch_job_dir_ids(List batch_dirs) /* All pending batch jobs must have a batch_dir entry, * otherwise we flag it as FAILED and don't schedule - * If the batch_dir entry exists for a batch job, remove it */ + * If the batch_dir entry exists for a PENDING or RUNNING batch job, + * remove it the list (of directories to be deleted) */ static void _validate_job_files(List batch_dirs) { ListIterator job_record_iterator; @@ -2856,13 +2936,13 @@ static void _validate_job_files(List batch_dirs) (struct job_record *) list_next(job_record_iterator))) { if (!job_ptr->batch_flag) continue; - if ((job_ptr->job_state != JOB_PENDING) && - (job_ptr->job_state != JOB_RUNNING)) + if (IS_JOB_FINISHED(job_ptr)) continue; /* Want to keep this job's files */ del_cnt = list_delete_all(batch_dirs, _find_batch_dir, &(job_ptr->job_id)); - if ((del_cnt == 0) && (job_ptr->job_state == JOB_PENDING)) { + if ((del_cnt == 0) && + (job_ptr->job_state == JOB_PENDING)) { error("Script for job %u lost, state set to FAILED", job_ptr->job_id); job_ptr->job_state = JOB_FAILED; diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index d728053a8f6210a7ec82d6b0aa7d57be9b63dd77..70a00bf7fc2290a35a6185434dae9b57a2073ed3 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -37,6 +37,7 @@ #include "src/common/list.h" #include "src/common/macros.h" +#include "src/common/xassert.h" #include "src/common/xstring.h" #include "src/slurmctld/agent.h" #include "src/slurmctld/locks.h" @@ -77,8 +78,7 @@ static int _build_job_queue(struct job_queue **job_queue) continue; if (job_record_point->priority == 0) /* held */ continue; - if (job_record_point->magic != JOB_MAGIC) - fatal("prio_order_job: data integrity is bad"); + xassert (job_record_point->magic == JOB_MAGIC); if (job_buffer_size <= job_queue_size) { job_buffer_size += 50; xrealloc(my_job_queue, job_buffer_size * @@ -139,8 +139,8 @@ int schedule(void) error_code = select_nodes(job_ptr, false); if (error_code == ESLURM_NODES_BUSY) { xrealloc(failed_parts, - (failed_part_cnt + - 1) * sizeof(struct part_record *)); + (failed_part_cnt + 1) * + sizeof(struct part_record *)); failed_parts[failed_part_cnt++] = job_ptr->part_ptr; } else if (error_code == SLURM_SUCCESS) { diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index a0237e81ff7d5c208083706d3400989962baf7a8..dc581965b039d9e667eba38d5f44af6a31d941b2 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -43,6 +43,7 @@ #include "src/common/hostlist.h" #include "src/common/pack.h" +#include "src/common/xassert.h" #include "src/common/xstring.h" #include "src/slurmctld/agent.h" #include "src/slurmctld/locks.h" @@ -198,34 +199,33 @@ char * bitmap2node_name (bitstr_t *bitmap) */ struct config_record * create_config_record (void) { - struct config_record *config_point; + struct config_record *config_ptr; last_node_update = time (NULL); - config_point = - (struct config_record *) - xmalloc (sizeof (struct config_record)); + config_ptr = (struct config_record *) + xmalloc (sizeof (struct config_record)); /* set default values */ - config_point->cpus = default_config_record.cpus; - config_point->real_memory = default_config_record.real_memory; - config_point->tmp_disk = default_config_record.tmp_disk; - config_point->weight = default_config_record.weight; - config_point->nodes = NULL; - config_point->node_bitmap = NULL; - config_point->magic = CONFIG_MAGIC; + config_ptr->cpus = default_config_record.cpus; + config_ptr->real_memory = default_config_record.real_memory; + config_ptr->tmp_disk = default_config_record.tmp_disk; + config_ptr->weight = default_config_record.weight; + config_ptr->nodes = NULL; + config_ptr->node_bitmap = NULL; + xassert (config_ptr->magic = CONFIG_MAGIC); /* set value */ if (default_config_record.feature) { - config_point->feature = + config_ptr->feature = (char *) xmalloc (strlen (default_config_record.feature) + 1); - strcpy (config_point->feature, default_config_record.feature); + strcpy (config_ptr->feature, default_config_record.feature); } else - config_point->feature = (char *) NULL; + config_ptr->feature = (char *) NULL; - if (list_append(config_list, config_point) == NULL) + if (list_append(config_list, config_ptr) == NULL) fatal ("create_config_record: unable to allocate memory"); - return config_point; + return config_ptr; } @@ -279,7 +279,7 @@ create_node_record (struct config_record *config_point, char *node_name) node_record_point->cpus = config_point->cpus; node_record_point->real_memory = config_point->real_memory; node_record_point->tmp_disk = config_point->tmp_disk; - node_record_point->magic = NODE_MAGIC; + xassert (node_record_point->magic = NODE_MAGIC) /* set value */; last_bitmap_update = time (NULL); return node_record_point; } @@ -299,37 +299,6 @@ static int _delete_config_record (void) } -/* - * delete_node_record - delete the node record for a node with specified name - * to avoid invalidating the bitmaps and hash table, we just clear the name - * set its state to NODE_STATE_DOWN - * IN name - name of the desired node - * RET 0 on success, errno otherwise - * global: node_record_table_ptr - pointer to global node table - */ -int delete_node_record (char *name) -{ - struct node_record *node_record_point; /* pointer to node_record */ - - last_node_update = time (NULL); - node_record_point = find_node_record (name); - if (node_record_point == (struct node_record *) NULL) { - error("delete_node_record: can't delete non-existent node %s", - name); - return ENOENT; - } - - if (node_record_point->partition_ptr) { - (node_record_point->partition_ptr->total_nodes)--; - (node_record_point->partition_ptr->total_cpus) -= - node_record_point->cpus; - } - strcpy (node_record_point->name, ""); - _make_node_down(node_record_point); - return SLURM_SUCCESS; -} - - /* dump_all_node_state - save the state of all nodes to file */ int dump_all_node_state ( void ) { @@ -346,10 +315,9 @@ int dump_all_node_state ( void ) /* write node records to buffer */ lock_slurmctld (node_read_lock); for (inx = 0; inx < node_record_count; inx++) { - if ((node_record_table_ptr[inx].magic != NODE_MAGIC) || - (node_record_table_ptr[inx].config_ptr->magic != - CONFIG_MAGIC)) - fatal ("dump_all_node_state: data integrity is bad"); + xassert (node_record_table_ptr[inx].magic == NODE_MAGIC); + xassert (node_record_table_ptr[inx].config_ptr->magic == + CONFIG_MAGIC); _dump_node_state (&node_record_table_ptr[inx], buffer); } @@ -651,7 +619,7 @@ int init_node_conf (void) config_list = list_create (&_list_delete_config); if (config_list == NULL) - fatal ("init_node_conf: list_create can not allocate memory"); + fatal ("memory allocation failure"); return SLURM_SUCCESS; } @@ -671,13 +639,16 @@ int list_compare_config (void *config_entry1, void *config_entry2) * see list.h for documentation */ static void _list_delete_config (void *config_entry) { - struct config_record *config_record_point; - - config_record_point = (struct config_record *) config_entry; - xfree (config_record_point->feature); - xfree (config_record_point->nodes); - FREE_NULL_BITMAP (config_record_point->node_bitmap); - xfree (config_record_point); + struct config_record *config_ptr = (struct config_record *) + config_entry; + + if (config_ptr == NULL) + fatal ("_list_delete_config: config_ptr == NULL"); + xassert(config_ptr->magic == CONFIG_MAGIC); + xfree (config_ptr->feature); + xfree (config_ptr->nodes); + FREE_NULL_BITMAP (config_ptr->node_bitmap); + xfree (config_ptr); } @@ -727,8 +698,8 @@ int node_name2bitmap (char *node_names, bitstr_t **bitmap) } my_bitmap = (bitstr_t *) bit_alloc (node_record_count); - if (my_bitmap == NULL) - fatal("bit_alloc memory allocation failure"); + if (my_bitmap == 0) + fatal ("memory allocation failure"); while ( (this_node_name = hostlist_shift (host_list)) ) { node_record_point = find_node_record (this_node_name); @@ -780,10 +751,9 @@ void pack_all_node (char **buffer_ptr, int *buffer_size) /* write node records */ for (inx = 0; inx < node_record_count; inx++) { - if ((node_record_table_ptr[inx].magic != NODE_MAGIC) || - (node_record_table_ptr[inx].config_ptr->magic != - CONFIG_MAGIC)) - fatal ("pack_all_node: data integrity is bad"); + xassert (node_record_table_ptr[inx].magic == NODE_MAGIC); + xassert (node_record_table_ptr[inx].config_ptr->magic == + CONFIG_MAGIC); _pack_node(&node_record_table_ptr[inx], buffer); nodes_packed ++ ; @@ -1222,8 +1192,8 @@ void set_node_down (char *name) return; } - _make_node_down(node_ptr); (void) kill_running_job_by_node_name(name, false); + _make_node_down(node_ptr); return; } @@ -1269,9 +1239,9 @@ void ping_nodes (void) (base_state != NODE_STATE_DOWN)) { error ("Node %s not responding, setting DOWN", node_record_table_ptr[i].name); - _make_node_down(&node_record_table_ptr[i]); kill_running_job_by_node_name ( node_record_table_ptr[i].name, false); + _make_node_down(&node_record_table_ptr[i]); continue; } @@ -1480,13 +1450,15 @@ void msg_to_slurmd (slurm_msg_type_t msg_type) void make_node_alloc(struct node_record *node_ptr) { int inx = node_ptr - node_record_table_ptr; - uint16_t no_resp_flag; + uint16_t no_resp_flag, base_state; last_node_update = time (NULL); - no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND; - node_ptr->node_state = NODE_STATE_ALLOCATED | no_resp_flag; - (node_ptr->job_cnt)++; + (node_ptr->run_job_cnt)++; bit_clear(idle_node_bitmap, inx); + base_state = node_ptr->node_state & (~NODE_STATE_NO_RESPOND); + no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND ; + if (base_state != NODE_STATE_COMPLETING) + node_ptr->node_state = NODE_STATE_ALLOCATED | no_resp_flag; } /* make_node_comp - flag specified node as completing a job */ @@ -1496,7 +1468,7 @@ void make_node_comp(struct node_record *node_ptr) last_node_update = time (NULL); base_state = node_ptr->node_state & (~NODE_STATE_NO_RESPOND); - no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND; + no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND; if ((base_state == NODE_STATE_DOWN) || (base_state == NODE_STATE_DRAINED) || (base_state == NODE_STATE_DRAINING)) { @@ -1506,6 +1478,12 @@ void make_node_comp(struct node_record *node_ptr) } else { node_ptr->node_state = NODE_STATE_COMPLETING | no_resp_flag; } + + if (node_ptr->run_job_cnt) + (node_ptr->run_job_cnt)--; + else + error("Node %s run_job_cnt underflow", node_ptr->name); + (node_ptr->comp_job_cnt)++; } /* _make_node_down - flag specified node as down */ @@ -1517,28 +1495,55 @@ static void _make_node_down(struct node_record *node_ptr) last_node_update = time (NULL); no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND; node_ptr->node_state = NODE_STATE_DOWN | no_resp_flag; - node_ptr->job_cnt = 0; bit_clear (up_node_bitmap, inx); bit_clear (idle_node_bitmap, inx); } -/* make_node_idle - flag specified node as having completed a job */ -void make_node_idle(struct node_record *node_ptr) +/* + * make_node_idle - flag specified node as having completed a job + * IN node_ptr - pointer to node reporting job completion + * IN job_ptr - pointer to job that just completed + */ +void make_node_idle(struct node_record *node_ptr, + struct job_record *job_ptr) { int inx = node_ptr - node_record_table_ptr; uint16_t no_resp_flag, base_state; + if ((job_ptr) && /* Specific job completed */ + (bit_test(job_ptr->node_bitmap, inx))) { /* Not a replay */ + last_job_update = time (NULL); + bit_clear(job_ptr->node_bitmap, inx); + if (job_ptr->node_cnt) { + if ((--job_ptr->node_cnt) == 0) + job_ptr->job_state &= (~JOB_COMPLETING); + } else { + error("node_cnt underflow on job_id %u", + job_ptr->job_id); + } + + if (node_ptr->comp_job_cnt) + (node_ptr->comp_job_cnt)--; + else + error("Node %s comp_job_cnt underflow, job_id %u", + node_ptr->name, job_ptr->job_id); + if (node_ptr->comp_job_cnt > 0) + return; /* More jobs completing */ + } + last_node_update = time (NULL); base_state = node_ptr->node_state & (~NODE_STATE_NO_RESPOND); no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND; if ((base_state == NODE_STATE_DOWN) || (base_state == NODE_STATE_DRAINED)) { - debug3("Node %s being left in state %s", - node_state_string((enum node_states)node_ptr->name)); + debug3("Node %s being left in state %s", node_ptr->name, + node_state_string((enum node_states)base_state)); } else if (base_state == NODE_STATE_DRAINING) { node_ptr->node_state = NODE_STATE_DRAINED; bit_clear(idle_node_bitmap, inx); bit_clear(up_node_bitmap, inx); + } else if (node_ptr->run_job_cnt) { + node_ptr->node_state = NODE_STATE_ALLOCATED | no_resp_flag; } else { node_ptr->node_state = NODE_STATE_IDLE | no_resp_flag; if (no_resp_flag == 0) diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 41d9b4ed56a751a96aa793c6baa7ec48bcbdcd49..4cf4a58a8f57eb4070882cb009858256155a5581 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -40,6 +40,7 @@ #include <slurm/slurm_errno.h> #include "src/common/hostlist.h" +#include "src/common/xassert.h" #include "src/common/xmalloc.h" #include "src/slurmctld/agent.h" #include "src/slurmctld/slurmctld.h" @@ -137,6 +138,10 @@ void deallocate_nodes(struct job_record *job_ptr) pthread_attr_t attr_agent; pthread_t thread_agent; int buf_rec_size = 0; + if (job_ptr == NULL) + fatal ("job_ptr == NULL"); + if (job_ptr->details == NULL) + fatal ("job_ptr->details == NULL"); agent_args = xmalloc(sizeof(agent_arg_t)); agent_args->msg_type = REQUEST_REVOKE_JOB_CREDENTIAL; @@ -275,7 +280,7 @@ _pick_best_quadrics(bitstr_t * bitmap, bitstr_t * req_bitmap, int best_fit_location = 0, best_fit_sufficient; if (bitmap == NULL) - fatal("_pick_best_quadrics: bitmap pointer is NULL"); + fatal ("_pick_best_quadrics: bitmap == NULL"); consec_index = 0; consec_size = 50; /* start allocation for 50 sets of @@ -702,9 +707,8 @@ int select_nodes(struct job_record *job_ptr, bool test_only) struct part_record *part_ptr = job_ptr->part_ptr; if (job_ptr == NULL) - fatal("select_nodes: NULL job pointer value"); - if (job_ptr->magic != JOB_MAGIC) - fatal("select_nodes: bad job pointer value"); + fatal ("select_nodes: job_ptr == NULL"); + xassert (job_ptr->magic == JOB_MAGIC); /* insure that partition exists and is up */ if (part_ptr == NULL) { @@ -761,8 +765,8 @@ int select_nodes(struct job_record *job_ptr, bool test_only) /* assign the nodes and stage_in the job */ job_ptr->nodes = bitmap2node_name(req_bitmap); - allocate_nodes(req_bitmap); job_ptr->node_bitmap = req_bitmap; + allocate_nodes(job_ptr->node_bitmap); build_node_details(job_ptr); req_bitmap = NULL; job_ptr->job_state = JOB_RUNNING; @@ -944,11 +948,9 @@ static int _nodes_in_sets(bitstr_t *req_bitmap, } /* - * build_node_details - set cpu counts and addresses for allocated nodes + * build_node_details - set cpu counts and addresses for allocated nodes: + * cpu_count_reps, cpus_per_node, node_addr, node_cnt, num_cpu_groups * IN job_ptr - pointer to a job record - * NOTE: the arrays cpus_per_node, cpu_count_reps and node_addr in the job - * details record are allocated by build_node_details and must be - * xfreed by the caller, preferably using delete_job_details */ void build_node_details(struct job_record *job_ptr) { diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 37c41be0ca6ae573bc678dd4ed802b290287c6bf..c83b42fe71ec1afbaa87b2103baad97cf295d3fb 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -170,43 +170,43 @@ static int _build_part_bitmap(struct part_record *part_record_point) */ struct part_record *create_part_record(void) { - struct part_record *part_record_point; + struct part_record *part_ptr; last_part_update = time(NULL); - part_record_point = + part_ptr = (struct part_record *) xmalloc(sizeof(struct part_record)); - strcpy(part_record_point->name, "DEFAULT"); - part_record_point->max_time = default_part.max_time; - part_record_point->max_nodes = default_part.max_nodes; - part_record_point->root_only = default_part.root_only; - part_record_point->state_up = default_part.state_up; - part_record_point->shared = default_part.shared; - part_record_point->total_nodes = default_part.total_nodes; - part_record_point->total_cpus = default_part.total_cpus; - part_record_point->node_bitmap = NULL; - part_record_point->magic = PART_MAGIC; + strcpy(part_ptr->name, "DEFAULT"); + part_ptr->max_time = default_part.max_time; + part_ptr->max_nodes = default_part.max_nodes; + part_ptr->root_only = default_part.root_only; + part_ptr->state_up = default_part.state_up; + part_ptr->shared = default_part.shared; + part_ptr->total_nodes = default_part.total_nodes; + part_ptr->total_cpus = default_part.total_cpus; + part_ptr->node_bitmap = NULL; + xassert (part_ptr->magic = PART_MAGIC); /* set value */ if (default_part.allow_groups) { - part_record_point->allow_groups = + part_ptr->allow_groups = (char *) xmalloc(strlen(default_part.allow_groups) + 1); - strcpy(part_record_point->allow_groups, + strcpy(part_ptr->allow_groups, default_part.allow_groups); } else - part_record_point->allow_groups = NULL; + part_ptr->allow_groups = NULL; if (default_part.nodes) { - part_record_point->nodes = + part_ptr->nodes = (char *) xmalloc(strlen(default_part.nodes) + 1); - strcpy(part_record_point->nodes, default_part.nodes); + strcpy(part_ptr->nodes, default_part.nodes); } else - part_record_point->nodes = NULL; + part_ptr->nodes = NULL; - if (list_append(part_list, part_record_point) == NULL) + if (list_append(part_list, part_ptr) == NULL) fatal("create_part_record: unable to allocate memory"); - return part_record_point; + return part_ptr; } @@ -256,8 +256,7 @@ int dump_all_part_state(void) part_record_iterator = list_iterator_create(part_list); while ((part_record_point = (struct part_record *) list_next(part_record_iterator))) { - if (part_record_point->magic != PART_MAGIC) - fatal("dump_all_part_state: data integrity is bad"); + xassert (part_record_point->magic == PART_MAGIC); _dump_part_state(part_record_point, buffer); } list_iterator_destroy(part_record_iterator); @@ -407,8 +406,8 @@ int load_all_part_state(void) } /* find record and perform update */ - part_ptr = - list_find_first(part_list, &list_find_part, part_name); + part_ptr = list_find_first(part_list, &list_find_part, + part_name); if (part_ptr) { part_ptr->max_time = max_time; @@ -467,13 +466,13 @@ int init_part_conf(void) last_part_update = time(NULL); strcpy(default_part.name, "DEFAULT"); - default_part.max_time = INFINITE; - default_part.max_nodes = INFINITE; - default_part.root_only = 0; - default_part.state_up = 1; - default_part.shared = SHARED_NO; + default_part.max_time = INFINITE; + default_part.max_nodes = INFINITE; + default_part.root_only = 0; + default_part.state_up = 1; + default_part.shared = SHARED_NO; default_part.total_nodes = 0; - default_part.total_cpus = 0; + default_part.total_cpus = 0; xfree(default_part.nodes); xfree(default_part.allow_groups); xfree(default_part.allow_uids); @@ -485,9 +484,7 @@ int init_part_conf(void) part_list = list_create(&_list_delete_part); if (part_list == NULL) - fatal - ("init_part_conf: list_create can not allocate memory"); - + fatal ("memory allocation failure"); strcpy(default_part_name, ""); default_part_loc = (struct part_record *) NULL; @@ -573,8 +570,7 @@ pack_all_part(char **buffer_ptr, int *buffer_size) part_record_iterator = list_iterator_create(part_list); while ((part_record_point = (struct part_record *) list_next(part_record_iterator))) { - if (part_record_point->magic != PART_MAGIC) - fatal("pack_all_part: data integrity is bad"); + xassert (part_record_point->magic == PART_MAGIC); pack_part(part_record_point, buffer); parts_packed++; diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index f72ec18c7f4ca9d13b6811867fdab7421e170089..74cf467704435ea7f394b3820ac66302245f6467 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -54,7 +54,9 @@ static int _init_all_slurm_conf(void); static int _parse_node_spec(char *in_line); static int _parse_part_spec(char *in_line); static void _set_config_defaults(slurm_ctl_conf_t * ctl_conf_ptr); +static int _sync_nodes_to_comp_job(void); static int _sync_nodes_to_jobs(void); +static int _sync_nodes_to_run_job(struct job_record *job_ptr); #ifdef HAVE_LIBELAN3 static void _validate_node_proc_count(void); #endif @@ -95,25 +97,22 @@ static int _build_bitmaps(void) FREE_NULL_BITMAP(idle_node_bitmap); FREE_NULL_BITMAP(up_node_bitmap); idle_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); - up_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); - if ((idle_node_bitmap == NULL) || (up_node_bitmap == NULL)) - fatal("bit_alloc memory allocation failure"); - + up_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); + if ((idle_node_bitmap == NULL) || + (up_node_bitmap == NULL)) + fatal ("memory allocation failure"); /* initialize the configuration bitmaps */ config_record_iterator = list_iterator_create(config_list); if (config_record_iterator == NULL) - fatal - ("_build_bitmaps: list_iterator_create unable to allocate memory"); + fatal ("memory allocation failure"); - while ((config_record_point = - (struct config_record *) - list_next(config_record_iterator))) { + while ((config_record_point = (struct config_record *) + list_next(config_record_iterator))) { FREE_NULL_BITMAP(config_record_point->node_bitmap); - config_record_point->node_bitmap = (bitstr_t *) bit_alloc(node_record_count); if (config_record_point->node_bitmap == NULL) - fatal("bit_alloc memory allocation failure"); + fatal ("memory allocation failure"); } list_iterator_destroy(config_record_iterator); @@ -124,15 +123,13 @@ static int _build_bitmaps(void) if (node_record_table_ptr[i].name[0] == '\0') continue; /* defunct */ - base_state = - node_record_table_ptr[i].node_state & - (~NODE_STATE_NO_RESPOND); - no_resp_flag = - node_record_table_ptr[i].node_state & - NODE_STATE_NO_RESPOND; + base_state = node_record_table_ptr[i].node_state & + (~NODE_STATE_NO_RESPOND); + no_resp_flag = node_record_table_ptr[i].node_state & + NODE_STATE_NO_RESPOND; if (base_state == NODE_STATE_IDLE) bit_set(idle_node_bitmap, i); - if ((base_state != NODE_STATE_DOWN) && + if ((base_state != NODE_STATE_DOWN) && (base_state != NODE_STATE_UNKNOWN) && (base_state != NODE_STATE_DRAINED) && (no_resp_flag == 0)) @@ -145,11 +142,10 @@ static int _build_bitmaps(void) /* scan partition table and identify nodes in each */ all_part_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); if (all_part_node_bitmap == NULL) - fatal("bit_alloc memory allocation failure"); + fatal ("memory allocation failure"); part_record_iterator = list_iterator_create(part_list); if (part_record_iterator == NULL) - fatal - ("_build_bitmaps: list_iterator_create unable to allocate memory"); + fatal ("memory allocation failure"); while ((part_record_point = (struct part_record *) list_next(part_record_iterator))) { @@ -157,7 +153,7 @@ static int _build_bitmaps(void) part_record_point->node_bitmap = (bitstr_t *) bit_alloc(node_record_count); if (part_record_point->node_bitmap == NULL) - fatal("bit_alloc memory allocation failure"); + fatal ("memory allocation failure"); /* check for each node in the partition */ if ((part_record_point->nodes == NULL) || @@ -209,9 +205,10 @@ static int _build_bitmaps(void) /* * _init_all_slurm_conf - initialize or re-initialize the slurm - * configuration values. - * RET 0 if no error, otherwise an error code - * Note: Operates on common variables, no arguments + * configuration values. + * RET 0 if no error, otherwise an error code. + * NOTE: We leave the job table intact + * NOTE: Operates on common variables, no arguments */ static int _init_all_slurm_conf(void) { @@ -314,7 +311,7 @@ static int _parse_node_spec(char *in_line) free(this_node_name); this_node_name = malloc(128); if (this_node_name == NULL) - fatal("memory allocation failure"); + fatal ("memory allocation failure"); getnodename(this_node_name, 128); } if (strcasecmp(this_node_name, "DEFAULT") == 0) { @@ -593,7 +590,7 @@ static int _parse_part_spec(char *in_line) xfree(nodes); nodes = xmalloc(128); if (nodes == NULL) - fatal("memory allocation failure"); + fatal ("memory allocation failure"); getnodename(nodes, 128); } part_record_point->nodes = nodes; @@ -728,33 +725,35 @@ int read_slurm_conf(int recover) } rehash(); - if (old_node_table_ptr) { - info("restoring original state of nodes"); - for (i = 0; i < old_node_record_count; i++) { - node_record_point = - find_node_record(old_node_table_ptr[i].name); - if (node_record_point) - node_record_point->node_state = - old_node_table_ptr[i].node_state; - } - xfree(old_node_table_ptr); - } set_slurmd_addr(); if (recover) { (void) load_all_node_state(); (void) load_all_part_state(); (void) load_all_job_state(); + } else { + if (old_node_table_ptr) { + info("restoring original state of nodes"); + for (i = 0; i < old_node_record_count; i++) { + node_record_point = + find_node_record(old_node_table_ptr[i].name); + if (node_record_point) + node_record_point->node_state = + old_node_table_ptr[i].node_state; + } + } + reset_job_bitmaps(); } + (void) _sync_nodes_to_jobs(); (void) sync_job_files(); + xfree(old_node_table_ptr); if ((error_code = _build_bitmaps())) return error_code; #ifdef HAVE_LIBELAN3 _validate_node_proc_count(); #endif - if (recover) - (void) _sync_nodes_to_jobs(); + (void) _sync_nodes_to_comp_job(); load_part_uid_allow_list(1); @@ -816,10 +815,8 @@ static void _set_config_defaults(slurm_ctl_conf_t * ctl_conf_ptr) /* * _sync_nodes_to_jobs - sync node state to job states on slurmctld restart. - * we perform "lazy" updates on node states due to their number (assumes - * number of jobs is much smaller than the number of nodes). This - * routine marks nodes allocated to a job as busy no matter what the - * node's last saved state + * This routine marks nodes allocated to a job as busy no matter what + * the node's last saved state * RET count of nodes having state changed * Note: Operates on common variables, no arguments */ @@ -827,28 +824,41 @@ static int _sync_nodes_to_jobs(void) { struct job_record *job_ptr; ListIterator job_record_iterator; - int i, update_cnt = 0; - uint16_t no_resp_flag; + int update_cnt = 0; job_record_iterator = list_iterator_create(job_list); - while ((job_ptr = - (struct job_record *) list_next(job_record_iterator))) { - if (job_ptr->job_state > JOB_COMPLETING) - continue; + while ((job_ptr = (struct job_record *) + list_next(job_record_iterator))) { if (job_ptr->node_bitmap == NULL) continue; - for (i = 0; i < node_record_count; i++) { - if (bit_test(job_ptr->node_bitmap, i) == 0) - continue; - node_record_table_ptr[i].job_cnt++; - if (node_record_table_ptr[i].node_state == - NODE_STATE_ALLOCATED) - continue; /* already in proper state */ + + if ((job_ptr->job_state == JOB_RUNNING) || + (job_ptr->job_state & JOB_COMPLETING)) + update_cnt += _sync_nodes_to_run_job(job_ptr); + } + if (update_cnt) + info("_sync_nodes_to_jobs updated state of %d nodes", + update_cnt); + return update_cnt; +} + +/* For jobs which are in state COMPLETING, deallocate the nodes and + * issue the RPC to revoke credentials */ +static int _sync_nodes_to_comp_job(void) +{ + struct job_record *job_ptr; + ListIterator job_record_iterator; + int update_cnt = 0; + + job_record_iterator = list_iterator_create(job_list); + while ((job_ptr = (struct job_record *) + list_next(job_record_iterator))) { + if ((job_ptr->node_bitmap) && + (job_ptr->job_state & JOB_COMPLETING)) { update_cnt++; - no_resp_flag = node_record_table_ptr[i].node_state & - NODE_STATE_NO_RESPOND; - node_record_table_ptr[i].node_state = - NODE_STATE_ALLOCATED | no_resp_flag; + info("Revoking credentials for job_id %u", + job_ptr->job_id); + deallocate_nodes(job_ptr); } } if (update_cnt) @@ -857,6 +867,32 @@ static int _sync_nodes_to_jobs(void) return update_cnt; } +static int _sync_nodes_to_run_job(struct job_record *job_ptr) +{ + int i, cnt = 0; + uint16_t base_state, no_resp_flag; + + for (i = 0; i < node_record_count; i++) { + if (bit_test(job_ptr->node_bitmap, i) == 0) + continue; + node_record_table_ptr[i].run_job_cnt++; + base_state = node_record_table_ptr[i].node_state & + (~NODE_STATE_NO_RESPOND); + if (base_state == NODE_STATE_DOWN) + job_ptr->job_state = JOB_NODE_FAIL | JOB_COMPLETING; + if ((base_state == NODE_STATE_UNKNOWN) || + (base_state == NODE_STATE_IDLE) || + (base_state == NODE_STATE_DRAINED)) { + cnt++; + no_resp_flag = node_record_table_ptr[i].node_state & + NODE_STATE_NO_RESPOND; + node_record_table_ptr[i].node_state = + NODE_STATE_ALLOCATED | no_resp_flag; + } + } + return cnt; +} + #ifdef HAVE_LIBELAN3 /* Every node in a given partition must have the same processor count * at present, this function insure it */ diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 482f85fbe8793d96868c74d3c39abaac275ba6a4..d38dc8fc8f730926ad7781272337dc7ef392acfb 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -67,6 +67,10 @@ if (_X) bit_free (_X); \ _X = NULL; \ } while (0) +#define IS_JOB_FINISHED(_X) \ + ((_X->job_state & (~JOB_COMPLETING)) > JOB_RUNNING) +#define IS_JOB_PENDING(_X) \ + ((_X->job_state & (~JOB_COMPLETING)) == JOB_PENDING) /*****************************************************************************\ * GENERAL CONFIGURATION parameters and data structures @@ -144,7 +148,8 @@ struct node_record { struct part_record *partition_ptr; /* partition for this node */ char comm_name[MAX_NAME_LEN]; /* communications path name to node */ slurm_addr slurm_addr; /* network address */ - uint16_t job_cnt; /* count of jobs allocated to node */ + uint16_t comp_job_cnt; /* count of jobs completing on node */ + uint16_t run_job_cnt; /* count of jobs running on node */ }; extern struct node_record *node_record_table_ptr; /* ptr to node records */ @@ -237,10 +242,7 @@ struct job_record { struct part_record *part_ptr; /* pointer to the partition record */ uint16_t batch_flag; /* 1 if batch job (with script) */ uint32_t user_id; /* user the job runs as */ - enum job_states job_state; /* state of the job, NOTE: state - * JOB_COMPLETING is set in pack_job - * when (job state > JOB_RUNNING) && - * (node_count > 0), its artificial */ + enum job_states job_state; /* state of the job */ uint16_t kill_on_node_fail; /* 1 if job should be killed on node failure */ uint16_t kill_on_step_done; /* 1 if job should be killed when @@ -321,11 +323,9 @@ extern char * bitmap2node_name (bitstr_t *bitmap) ; void build_job_cred(struct job_record *job_ptr); /* - * build_node_details - set cpu counts and addresses for allocated nodes + * build_node_details - set cpu counts and addresses for allocated nodes: + * cpu_count_reps, cpus_per_node, node_addr, node_cnt, num_cpu_groups * IN job_ptr - pointer to a job record - * NOTE: the arrays cpus_per_node, cpu_count_reps and node_addr in the job - * details record are allocated by build_node_details and must be - * xfreed by the caller, preferably using delete_job_details */ extern void build_node_details (struct job_record *job_ptr); @@ -418,16 +418,6 @@ extern void delete_all_step_records (struct job_record *job_ptr); */ extern void delete_job_details (struct job_record *job_entry); -/* - * delete_node_record - delete the node record for a node with specified name - * to avoid invalidating the bitmaps and hash table, we just clear the name - * set its state to NODE_STATE_DOWN - * IN name - name of the desired node - * RET 0 on success, errno otherwise - * global: node_record_table_ptr - pointer to global node table - */ -extern int delete_node_record (char *name); - /* * delete_step_record - delete record for job step for specified job_ptr * and step_id @@ -711,8 +701,13 @@ extern void make_node_alloc(struct node_record *node_ptr); /* make_node_comp - flag specified node as completing a job */ extern void make_node_comp(struct node_record *node_ptr); -/* make_node_idle - flag specified node as no longer being in use */ -extern void make_node_idle(struct node_record *node_ptr); +/* + * make_node_idle - flag specified node as having completed a job + * IN node_ptr - pointer to node reporting job completion + * IN job_ptr - pointer to job that just completed + */ +extern void make_node_idle(struct node_record *node_ptr, + struct job_record *job_ptr); /* msg_to_slurmd - send given msg_type every slurmd, no args */ extern void msg_to_slurmd (slurm_msg_type_t msg_type); diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index d63b76301a782d594b91623eee04a89b86e96a1c..ae820eda500d4d92925f49304ab25941888617c8 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -29,7 +29,6 @@ #endif #include <time.h> -#include <assert.h> #include <ctype.h> #include <errno.h> #include <stdio.h> @@ -63,7 +62,8 @@ create_step_record (struct job_record *job_ptr) { struct step_record *step_record_point; - assert (job_ptr); + if (job_ptr == NULL) + fatal ("create_step_record: job_ptr == NULL"); step_record_point = (struct step_record *) xmalloc (sizeof (struct step_record)); @@ -88,7 +88,8 @@ delete_all_step_records (struct job_record *job_ptr) ListIterator step_record_iterator; struct step_record *step_record_point; - assert (job_ptr); + if (job_ptr == NULL) + fatal ("delete_all_step_records: job_ptr == NULL"); step_record_iterator = list_iterator_create (job_ptr->step_list); while ((step_record_point = @@ -120,7 +121,8 @@ delete_step_record (struct job_record *job_ptr, uint32_t step_id) struct step_record *step_record_point; int error_code; - assert (job_ptr); + if (job_ptr == NULL) + fatal ("delete_step_record: job_ptr == NULL"); error_code = ENOENT; step_record_iterator = list_iterator_create (job_ptr->step_list); @@ -174,21 +176,21 @@ struct step_record * find_step_record(struct job_record *job_ptr, uint16_t step_id) { ListIterator step_record_iterator; - struct step_record *step_record_point; + struct step_record *step_ptr; if (job_ptr == NULL) return NULL; step_record_iterator = list_iterator_create (job_ptr->step_list); - while ((step_record_point = - (struct step_record *) list_next (step_record_iterator))) { - if (step_record_point->step_id == step_id) { + while ((step_ptr = (struct step_record *) + list_next (step_record_iterator))) { + if (step_ptr->step_id == step_id) { break; } } list_iterator_destroy (step_record_iterator); - return step_record_point; + return step_ptr; } @@ -215,10 +217,7 @@ int job_step_signal(uint32_t job_id, uint32_t step_id, return ESLURM_INVALID_JOB_ID; } - if ((job_ptr->job_state == JOB_FAILED) || - (job_ptr->job_state == JOB_COMPLETE) || - (job_ptr->job_state == JOB_NODE_FAIL) || - (job_ptr->job_state == JOB_TIMEOUT)) + if (IS_JOB_FINISHED(job_ptr)) return ESLURM_ALREADY_DONE; if ((job_ptr->user_id != uid) && (uid != 0) && (uid != getuid())) { @@ -231,7 +230,7 @@ int job_step_signal(uint32_t job_id, uint32_t step_id, if (step_ptr == NULL) { info("job_step_cancel step %u.%u not found", job_id, step_id); - return ESLURM_ALREADY_DONE; + return ESLURM_INVALID_JOB_ID; } signal_step_tasks(step_ptr, signal); @@ -337,9 +336,7 @@ int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid, (list_count(job_ptr->step_list) <= 1)) return job_complete(job_id, uid, requeue, job_return_code); - if ((job_ptr->job_state == JOB_FAILED) || - (job_ptr->job_state == JOB_COMPLETE) || - (job_ptr->job_state == JOB_TIMEOUT)) + if (IS_JOB_FINISHED(job_ptr)) return ESLURM_ALREADY_DONE; if ((job_ptr->user_id != uid) && (uid != 0) && (uid != getuid())) { @@ -510,9 +507,10 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record, (step_specs->user_id != 0)) return ESLURM_ACCESS_DENIED ; - if ((job_ptr->job_state == JOB_COMPLETE) || - (job_ptr->job_state == JOB_FAILED) || - (job_ptr->job_state == JOB_TIMEOUT) || + if (IS_JOB_PENDING(job_ptr)) + return ESLURM_INVALID_JOB_ID ; + + if (IS_JOB_FINISHED(job_ptr) || (job_ptr->end_time <= time(NULL))) return ESLURM_ALREADY_DONE; diff --git a/src/squeue/opts.c b/src/squeue/opts.c index 4cdf72282b7e56520eb7ff0678ed7585a8442689..7611487b71e9eaa69678ed5cf9ec9c6ac452ed39 100644 --- a/src/squeue/opts.c +++ b/src/squeue/opts.c @@ -190,7 +190,7 @@ parse_command_line( int argc, char* argv[] ) } /* - * _parse_state - convert state name string to numeric value + * _parse_state - convert job state name string to numeric value * IN str - state name * OUT states - enum job_states value corresponding to str * RET 0 or error code @@ -211,13 +211,20 @@ _parse_state( char* str, enum job_states* states ) return SLURM_SUCCESS; } } - + if ((strcasecmp(job_state_string(JOB_COMPLETING), str) == 0) || + (strcasecmp(job_state_string_compact(JOB_COMPLETING),str) == 0)) { + *states = JOB_COMPLETING; + return SLURM_SUCCESS; + } + fprintf (stderr, "Invalid job state specified: %s\n", str); state_names = xstrdup(job_state_string(0)); for (i=1; i<JOB_END; i++) { xstrcat(state_names, ","); xstrcat(state_names, job_state_string(i)); } + xstrcat(state_names, ","); + xstrcat(state_names, job_state_string(JOB_COMPLETING)); fprintf (stderr, "Valid job states include: %s\n", state_names); xfree (state_names); return SLURM_ERROR; @@ -557,8 +564,8 @@ _build_part_list( char* str ) } /* - * _build_state_list - build a list of node states - * IN str - comma separated list of node states + * _build_state_list - build a list of job states + * IN str - comma separated list of job states * RET List of enum job_states values */ static List @@ -594,7 +601,7 @@ _build_state_list( char* str ) } /* - * _build_all_states_list - build a list containing all possible node states + * _build_all_states_list - build a list containing all possible job states * RET List of enum job_states values */ static List @@ -610,6 +617,9 @@ _build_all_states_list( void ) *state_id = ( enum job_states ) i; list_append( my_list, state_id ); } + state_id = xmalloc( sizeof( enum job_states ) ); + *state_id = ( enum job_states ) JOB_COMPLETING; + list_append( my_list, state_id ); return my_list; }