diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index 5bfb4a3e997c734a31a84e953e461f560a0c7550..16941e509f9fcae50b14db4685231c0990f7bda4 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -113,7 +113,8 @@ default value of zero. .TP \fB%C\fR Number of CPUs (processors) requested by the job or allocated to -it if already running. +it if already running. As a job is completing this number will +reflect the current number of CPUs allocated. .TP \fB%d\fR Minimum size of temporary disk space (in MB) requested by the job. @@ -124,7 +125,8 @@ required by a pending job. The actual number of nodes allocated to a pending job may exceed this number if the job specified a node range count (e.g. minimum and maximum node counts) or the the job specifies a processor count instead of a node count and the cluster contains nodes with varying -processor counts. +processor counts. As a job is completing this number will reflect the +current number of nodes allocated. .TP \fB%e\fR Time at which the job ended or is expected to end (based upon its time limit) @@ -193,8 +195,7 @@ requested by the job \fB%N\fR List of nodes allocated to the job or job step. In the case of a \fICOMPLETING\fR job, the list of nodes will comprise only those -nodes that have not yet been returned to service. This may result -in the node count being greater than the number of listed nodes. +nodes that have not yet been returned to service. .TP \fB%O\fR Are contiguous nodes requested by the job. diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 88d64adc887a723e48b42743f95f85ccc8b12da6..73f82ec3b9da30dcf7bd42a025bd6dadf381f668 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -94,7 +94,7 @@ #define JOB_HASH_INX(_job_id) (_job_id % hash_table_size) /* Change JOB_STATE_VERSION value when changing the state save format */ -#define JOB_STATE_VERSION "VER008" /*already changed for slurm2.1 */ +#define JOB_STATE_VERSION "VER009" /*already changed for slurm2.1 */ #define JOB_CKPT_VERSION "JOB_CKPT_001" @@ -113,7 +113,8 @@ static bool wiki_sched_test = false; /* Local functions */ static void _add_job_hash(struct job_record *job_ptr); -static int _checkpoint_job_record (struct job_record *job_ptr, char *image_dir); +static int _checkpoint_job_record (struct job_record *job_ptr, + char *image_dir); static int _copy_job_desc_to_file(job_desc_msg_t * job_desc, uint32_t job_id); static int _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, @@ -689,6 +690,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) pack32(dump_job_ptr->alloc_sid, buffer); pack32(dump_job_ptr->num_procs, buffer); pack32(dump_job_ptr->total_procs, buffer); + pack32(dump_job_ptr->cpu_cnt, buffer); pack32(dump_job_ptr->exit_code, buffer); pack32(dump_job_ptr->db_index, buffer); pack32(dump_job_ptr->assoc_id, buffer); @@ -773,7 +775,8 @@ static int _load_job_state(Buf buffer) { uint32_t job_id, user_id, group_id, time_limit, priority, alloc_sid; uint32_t exit_code, num_procs, assoc_id, db_index, name_len; - uint32_t next_step_id, total_procs, resv_id, spank_job_env_size = 0; + uint32_t next_step_id, total_procs, cpu_cnt, + resv_id, spank_job_env_size = 0; time_t start_time, end_time, suspend_time, pre_sus_time, tot_sus_time; time_t now = time(NULL); uint16_t job_state, details, batch_flag, step_flag; @@ -805,6 +808,7 @@ static int _load_job_state(Buf buffer) safe_unpack32(&alloc_sid, buffer); safe_unpack32(&num_procs, buffer); safe_unpack32(&total_procs, buffer); + safe_unpack32(&cpu_cnt, buffer); safe_unpack32(&exit_code, buffer); safe_unpack32(&db_index, buffer); safe_unpack32(&assoc_id, buffer); @@ -1003,6 +1007,7 @@ static int _load_job_state(Buf buffer) job_ptr->time_last_active = now; job_ptr->time_limit = time_limit; job_ptr->total_procs = total_procs; + job_ptr->cpu_cnt = cpu_cnt; job_ptr->tot_sus_time = tot_sus_time; job_ptr->user_id = user_id; job_ptr->warn_signal = warn_signal; @@ -1485,6 +1490,7 @@ extern int kill_running_job_by_node_name(char *node_name) if (IS_JOB_COMPLETING(job_ptr)) { job_count++; bit_clear(job_ptr->node_bitmap, bit_position); + job_update_cpu_cnt(job_ptr, bit_position); if (job_ptr->node_cnt) (job_ptr->node_cnt)--; else @@ -3819,6 +3825,47 @@ void job_time_limit(void) fini_job_resv_check(); } +extern int job_update_cpu_cnt(struct job_record *job_ptr, int node_inx) +{ + uint16_t cpu_cnt=0, i=0; + int curr_node_inx; + + xassert(job_ptr); + if(!job_ptr->job_resrcs || !job_ptr->job_resrcs->node_bitmap) { + error("job_update_cpu_cnt: " + "no job_resrcs or node_bitmap for job %u", + job_ptr->job_id); + return SLURM_ERROR; + } + /* Figure out what the first bit is in the job to get the + correct offset. unlike the job_ptr->node_bitmap which gets + cleared, job_ptr->job_resrcs->node_bitmap never gets cleared. + */ + curr_node_inx = bit_ffs(job_ptr->job_resrcs->node_bitmap); + if(curr_node_inx != -1) { + for (i=0; i<job_ptr->job_resrcs->cpu_array_cnt; i++) { + cpu_cnt = job_ptr->job_resrcs->cpu_array_value[i]; + if(curr_node_inx >= node_inx) + break; + curr_node_inx += job_ptr->job_resrcs->cpu_array_reps[i]; + } + job_ptr->cpu_cnt -= cpu_cnt; + if((int)job_ptr->cpu_cnt < 0) { + error("job_update_cpu_cnt: " + "cpu_cnt underflow on job_id %u", + job_ptr->job_id); + job_ptr->cpu_cnt = 0; + return SLURM_ERROR; + } + } else { + error("job_update_cpu_cnt: " + "no nodes set yet in job %u", job_ptr->job_id); + return SLURM_ERROR; + } + return SLURM_SUCCESS; +} + + /* Terminate a job that has exhausted its time limit */ static void _job_timed_out(struct job_record *job_ptr) { @@ -4178,6 +4225,7 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer) { struct job_details *detail_ptr; time_t begin_time = 0; + char *nodelist = NULL; pack32(dump_job_ptr->assoc_id, buffer); pack32(dump_job_ptr->job_id, buffer); @@ -4218,7 +4266,16 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer) pack_time(dump_job_ptr->pre_sus_time, buffer); pack32(dump_job_ptr->priority, buffer); - packstr(dump_job_ptr->nodes, buffer); + /* Only send the allocated nodelist since we are only sending + * the number of cpus and nodes that are currently allocated. */ + if(!IS_JOB_COMPLETING(dump_job_ptr)) + packstr(dump_job_ptr->nodes, buffer); + else { + nodelist = bitmap2node_name(dump_job_ptr->node_bitmap); + packstr(nodelist, buffer); + xfree(nodelist); + } + packstr(dump_job_ptr->partition, buffer); packstr(dump_job_ptr->account, buffer); packstr(dump_job_ptr->network, buffer); @@ -4249,7 +4306,9 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer) packstr(dump_job_ptr->wckey, buffer); packstr(dump_job_ptr->alloc_node, buffer); pack_bit_fmt(dump_job_ptr->node_bitmap, buffer); - if (dump_job_ptr->total_procs) + if (IS_JOB_COMPLETING(dump_job_ptr)) + pack32(dump_job_ptr->cpu_cnt, buffer); + else if (dump_job_ptr->total_procs) pack32(dump_job_ptr->total_procs, buffer); else pack32(dump_job_ptr->num_procs, buffer); diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 56207517955281139ac7a25c79f7e4a36b903dfe..ae3ea61cfad76f2b413cb59d149add36e429827b 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -2138,6 +2138,9 @@ void make_node_idle(struct node_record *node_ptr, (bit_test(job_ptr->node_bitmap, inx))) { /* Not a replay */ last_job_update = now; bit_clear(job_ptr->node_bitmap, inx); + + job_update_cpu_cnt(job_ptr, inx); + if (job_ptr->node_cnt) { if ((--job_ptr->node_cnt) == 0) { time_t delay; diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 49cff3f2abc17f78d95aca90de054819a80ae1b7..ae88d51eef18a64a72d6508046f03f9db7c81df6 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -202,6 +202,7 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, /* Issue the KILL RPC, but don't verify response */ down_node_cnt++; bit_clear(job_ptr->node_bitmap, i); + job_update_cpu_cnt(job_ptr, i); job_ptr->node_cnt--; } make_node_comp(node_ptr, job_ptr, suspended); @@ -1126,6 +1127,9 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, req_nodes, test_only, &preemptee_job_list); } + /* set up the cpu_cnt here so we can decrement it as nodes + free up. total_procs is set within _get_req_features */ + job_ptr->cpu_cnt = job_ptr->total_procs; if (!test_only && preemptee_job_list && (error_code == SLURM_SUCCESS)) _preempt_jobs(preemptee_job_list, &error_code); @@ -1903,6 +1907,7 @@ extern void re_kill_job(struct job_record *job_ptr) if (IS_NODE_DOWN(node_ptr)) { /* Consider job already completed */ bit_clear(job_ptr->node_bitmap, i); + job_update_cpu_cnt(job_ptr, i); if (node_ptr->comp_job_cnt) (node_ptr->comp_job_cnt)--; if ((--job_ptr->node_cnt) == 0) { diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 0e72c5d7cf75142991ed333e114e47512c56a1b0..117cf5bf9930f78366a0ba16ee1a386ca73c5e01 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -378,6 +378,8 @@ struct job_record { time_t ckpt_time; /* last time job was periodically * checkpointed */ char *comment; /* arbitrary comment */ + uint32_t cpu_cnt; /* current count of cpus held + * by the job */ uint16_t cr_enabled; /* specify if if Consumable Resources * is enabled. Needed since CR deals * with a finer granularity in its @@ -1023,6 +1025,15 @@ extern int job_step_signal(uint32_t job_id, uint32_t step_id, */ extern void job_time_limit (void); +/* + * job_update_cpu_cnt - when job is completing remove allocated cpus + * from count. + * IN/OUT job_ptr - job structure to be updated + * IN node_inx - node bit that is finished with job. + * RET SLURM_SUCCES on success SLURM_ERROR on cpu_cnt underflow + */ +extern int job_update_cpu_cnt(struct job_record *job_ptr, int node_inx); + /* * check_job_step_time_limit - terminate jobsteps which have exceeded * their time limit diff --git a/src/squeue/print.c b/src/squeue/print.c index 19d14cbe31789bbf7b216a1cd1c432c49332bef6..c9f1b84b4f8bbea801bcd9d43c4543ee3eaa966a 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -56,7 +56,6 @@ #include "src/common/xmalloc.h" #include "src/common/xstring.h" -static int _adjust_completing (job_info_t *j, node_info_msg_t **ni); static int _filter_job(job_info_t * job); static int _filter_step(job_step_info_t * step); static int _get_node_cnt(job_info_t * job); @@ -95,7 +94,6 @@ int print_jobs_array(job_info_t * jobs, int size, List format) /* Filter out the jobs of interest */ for (; i < size; i++) { - _adjust_completing(&jobs[i], &ni); if (_filter_job(&jobs[i])) continue; list_append(l, (void *) &jobs[i]); @@ -655,25 +653,16 @@ int _print_job_node_inx(job_info_t * job, int width, bool right, char* suffix) int _print_job_num_procs(job_info_t * job, int width, bool right, char* suffix) { - char tmp_char[8]; + char tmp_char[18]; if (job == NULL) /* Print the Header instead */ _print_str("CPUS", width, right, true); else { - if (job->job_resrcs && - (job->job_resrcs->cpu_array_cnt > 0) && - (job->job_resrcs->cpu_array_value) && - (job->job_resrcs->cpu_array_reps)) { - uint32_t cnt = 0, i; - for (i=0; i<job->job_resrcs->cpu_array_cnt; i++) { - cnt += job->job_resrcs->cpu_array_value[i] * - job->job_resrcs->cpu_array_reps[i]; - } - convert_num_unit((float)cnt, tmp_char, - sizeof(tmp_char), UNIT_NONE); - } else { - convert_num_unit((float)job->num_procs, tmp_char, - sizeof(tmp_char), UNIT_NONE); - } +#ifdef HAVE_BG + convert_num_unit((float)job->num_procs, tmp_char, + sizeof(tmp_char), UNIT_NONE); +#else + snprintf(tmp_char, sizeof(tmp_char), "%u", job->num_procs); +#endif _print_str(tmp_char, width, right, true); } if (suffix) @@ -1453,52 +1442,3 @@ static int _filter_step(job_step_info_t * step) return 0; } - -static int _adjust_completing (job_info_t *job_ptr, node_info_msg_t **ni) -{ - hostlist_t hl = NULL; - int i, j; - char buf[8192]; - - if (!(job_ptr->job_state & JOB_COMPLETING)) - return (0); - - /* NOTE: We want to load all nodes (show_all flag set) - * so that node index values from the job records are - * valid for cross-referencing */ - if ((*ni == NULL) && (slurm_load_node (0, ni, 1) < 0)) { - error ("Unable the load node information: %m"); - return (0); - } - - hl = hostlist_create (""); - for (i=0; ; i+=2) { - if (job_ptr->node_inx[i] == -1) - break; - if (i >= (*ni)->record_count) { - error ("Invalid node index for job %u", - job_ptr->job_id); - break; - } - for (j=job_ptr->node_inx[i]; j<=job_ptr->node_inx[i+1]; j++) { - if (j >= (*ni)->record_count) { - error ("Invalid node index for job %u", - job_ptr->job_id); - break; - } - hostlist_push(hl, (*ni)->node_array[j].name); - } - } - - hostlist_uniq(hl); - hostlist_ranged_string (hl, 8192, buf); - hostlist_destroy(hl); - /* if we decrement the nodelist why not the num_nodes? this - * code will set the number to the total number of nodes in - * the list */ - /* job_ptr->num_nodes = MAX(job_ptr->num_nodes, */ - /* _nodes_in_list(job_ptr->nodes)); */ - xfree (job_ptr->nodes); - job_ptr->nodes = xstrdup (buf); - return (0); -} diff --git a/src/sview/job_info.c b/src/sview/job_info.c index cfa4cf9c7c46efc378914ac846506dd9dd6b4005..5716466720f1e6cee061fd12148c1278f74d5998 100644 --- a/src/sview/job_info.c +++ b/src/sview/job_info.c @@ -1029,44 +1029,6 @@ static int _get_node_cnt(job_info_t * job) return node_cnt; } -static int _adjust_completing(job_info_t *job_ptr) -{ - static node_info_msg_t *node_info_ptr = NULL; - hostlist_t hl = NULL; - int i, j; - char buf[8192]; - - if(get_new_info_node(&node_info_ptr, force_refresh) == SLURM_ERROR) - return SLURM_ERROR; - - hl = hostlist_create (""); - for (i=0; ; i+=2) { - if (job_ptr->node_inx[i] == -1) - break; - if (i >= node_info_ptr->record_count) { - error ("Invalid node index for job %u", - job_ptr->job_id); - break; - } - for (j=job_ptr->node_inx[i]; j<=job_ptr->node_inx[i+1]; j++) { - if (j >= node_info_ptr->record_count) { - error ("Invalid node index for job %u", - job_ptr->job_id); - break; - } - hostlist_push(hl, node_info_ptr->node_array[j].name); - } - } - hostlist_uniq(hl); - hostlist_ranged_string(hl, sizeof(buf), buf); - hostlist_destroy(hl); - - xfree (job_ptr->nodes); - job_ptr->nodes = xstrdup (buf); - - return SLURM_SUCCESS; -} - /* this needs to be freed by xfree() */ static void _convert_char_to_job_and_step(const char *data, int *jobid, int *stepid) @@ -2357,9 +2319,6 @@ static List _create_job_info_list(job_info_msg_t *job_info_ptr, sview_job_info_ptr->job_ptr->nodes = xstrdup(tmp_char); } #endif - if(IS_JOB_COMPLETING(job_ptr)) - _adjust_completing(job_ptr); - if(!sview_job_info_ptr->node_cnt) sview_job_info_ptr->node_cnt = _get_node_cnt(job_ptr);