diff --git a/NEWS b/NEWS index 0e560ddffac7f1449909952f738af6a8051e035a..e172b22247f9b89ca8729ee4c8c1395bb0d161e9 100644 --- a/NEWS +++ b/NEWS @@ -26,7 +26,8 @@ documents those changes that are of interest to users and admins. * Changes in SLURM 2.0.1 ======================== -- Fix, truncate time of start and end for job steps in sacct. - -- initialize all messages to slurmdbd + -- Initialize all messages to slurmdbd. Previously uninitialized string could + cause slurmctld to fail with invalid memory reference. -- BLUEGENE - Fix, for when trying to finish a torus on a block already visited. Even though this may be possible electrically this isn't valid in the under lying infrastructure. @@ -45,6 +46,11 @@ documents those changes that are of interest to users and admins. -- Fix "-Q" (quiet) option for salloc and sbatch which was previously ignored. -- BLUEGENE - fix for finding odd shaped blocks in dynamic mode. + -- Fix logic supporting SuspendRate and ResumeRate configuration parameters. + Previous logic was changing state of one too many nodes per minute. + -- Save new reservation state file on shutdown (even if no changes). + -- Fix, when partitions are deleted the sched and select plugins are notified. + -- Fix linking problem that prevented checkpoint/aix from working. * Changes in SLURM 2.0.0 ======================== diff --git a/doc/html/power_save.shtml b/doc/html/power_save.shtml index aa29216ce51de33b8920c000b0bd79462683ba04..0254e701892a4d1f537f04454f9e998e2252226e 100644 --- a/doc/html/power_save.shtml +++ b/doc/html/power_save.shtml @@ -43,6 +43,8 @@ Maximum number of nodes to be placed into power saving mode per minute. A value of zero results in no limits being imposed. The default value is 60. +Rate calculations are performed using integers, so rounding errors +may be significant for very small values (1 or 2 nodes per minute). Use this to prevent rapid drops in power requirements.</li> <li><b>ResumeRate</b>: @@ -50,6 +52,8 @@ Maximum number of nodes to be removed from power saving mode per minute. A value of zero results in no limits being imposed. The default value is 300. +Rate calculations are performed using integers, so rounding errors +may be significant for very small values (1 or 2 nodes per minute). Use this to prevent rapid increases in power requirements.</li> <li><b>SuspendProgram</b>: @@ -219,6 +223,6 @@ In order to minimize this risk, when the <i>slurmctld</i> daemon is started and node which should be allocated to a job fails to respond, the <b>ResumeProgram</b> will be executed (possibly for a second time).</p> -<p style="text-align:center;">Last modified 27 May 2009</p> +<p style="text-align:center;">Last modified 2 June 2009</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 78ddbb16e462c74288c5739519d43867d2c2b55c..e1d931226ef717fdf84f3663464c3c3ad791c11e 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1018,8 +1018,10 @@ operation by \fBResumeProgram\fR. The value is number of nodes per minute and it can be used to prevent power surges if a large number of nodes in power save mode are assigned work at the same time (e.g. a large job starts). -A value of zero results in no limits being imposed. +A value of zero results in no limits being imposed. The default value is 300 nodes per minute. +Rate calculations are performed using integers, so rounding errors +may be significant for very small values (1 or 2 nodes per minute). Related configuration options include \fBResumeTimeout\fR, \fBResumeProgram\fR, \fBSuspendRate\fR, \fBSuspendTime\fR, \fBResumeTimeout\fR, \fBSuspendProgram\fR, \fBSuspendExcNodes\fR, and \fBSuspendExcParts\fR. @@ -1441,6 +1443,8 @@ The value is number of nodes per minute and it can be used to prevent a large drop in power power consumption (e.g. after a large job completes). A value of zero results in no limits being imposed. The default value is 60 nodes per minute. +Rate calculations are performed using integers, so rounding errors +may be significant for very small values (1 or 2 nodes per minute). Related configuration options include \fBResumeTimeout\fR, \fBResumeProgram\fR, \fBResumeRate\fR, \fBSuspendProgram\fR, \fBSuspendTime\fR, \fBSuspendTimeout\fR, \fBSuspendExcNodes\fR, and \fBSuspendExcParts\fR. diff --git a/src/plugins/checkpoint/aix/checkpoint_aix.c b/src/plugins/checkpoint/aix/checkpoint_aix.c index 39a18402515232cd1a34bd4572c43a447098a061..ee4a374da7435b639e0045f7d467f91ad588d0c4 100644 --- a/src/plugins/checkpoint/aix/checkpoint_aix.c +++ b/src/plugins/checkpoint/aix/checkpoint_aix.c @@ -58,6 +58,7 @@ #include <slurm/slurm.h> #include <slurm/slurm_errno.h> +#include "src/common/slurm_xlator.h" #include "src/common/list.h" #include "src/common/log.h" #include "src/common/pack.h" @@ -67,6 +68,13 @@ #include "src/slurmctld/agent.h" #include "src/slurmctld/slurmctld.h" +/* These are defined here so when we link with something other than + * the slurmctld we will have these symbols defined. They will get + * overwritten when linking with the slurmctld. + */ +struct node_record *node_record_table_ptr = NULL; +int node_record_count = 0; + struct check_job_info { uint16_t disabled; /* counter, checkpointable only if zero */ uint16_t node_cnt; diff --git a/src/plugins/select/bluegene/plugin/block_sys.c b/src/plugins/select/bluegene/plugin/block_sys.c index 09beaee2ad1118b3494c5b68af86a7ee983bde43..3053da497e29719cb40bc90a98fcf3a4b73d0869 100755 --- a/src/plugins/select/bluegene/plugin/block_sys.c +++ b/src/plugins/select/bluegene/plugin/block_sys.c @@ -1155,7 +1155,7 @@ extern int load_state_file(List curr_block_list, char *dir_name) bg_record->state = bg_info_record->state; bg_record->job_running = NO_JOB_RUNNING; - bg_record->bp_count = bit_size(node_bitmap); + bg_record->bp_count = bit_set_count(node_bitmap); bg_record->node_cnt = bg_info_record->node_cnt; if(bg_conf->bp_node_cnt > bg_record->node_cnt) { ionodes = bg_conf->bp_node_cnt diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 6f235cb59fab40ae31e5da264e1c7ade1009d60f..6775b2a0945bcd70dc391c8e9172ef75aa672a0f 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -1520,7 +1520,8 @@ static int _batch_launch_defer(queued_request_t *queued_req_ptr) if (!IS_NODE_POWER_SAVE(node_ptr) && !IS_NODE_NO_RESPOND(node_ptr)) { /* ready to launch, adjust time limit for boot time */ int resume_timeout = slurm_get_resume_timeout(); - if ((job_ptr->start_time + resume_timeout) >= now) + if ((job_ptr->time_limit != INFINITE) + && ((job_ptr->start_time + resume_timeout) >= now)) job_ptr->end_time = now + (job_ptr->time_limit * 60); queued_req_ptr->last_attempt = (time_t) 0; return 0; diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 300ffe48f4f1d2ba5e31ef19810dd22434b90d5b..8fa161a709c5746ddc3a68f3adeb83bb5a429275 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1358,8 +1358,9 @@ void save_all_state(void) { /* Each of these functions lock their own databases */ schedule_job_save(); - schedule_part_save(); schedule_node_save(); + schedule_part_save(); + schedule_resv_save(); schedule_trigger_save(); select_g_state_save(slurmctld_conf.state_save_location); dump_assoc_mgr_state(slurmctld_conf.state_save_location); diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 361d9af918d1d73dc3270971f79141ed9d92e82c..1557098b21f50eec1810893429d6f886c67daac4 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -393,7 +393,8 @@ extern int schedule(void) bit_not(job_ptr->part_ptr->node_bitmap); } } else if (error_code == ESLURM_RESERVATION_NOT_USABLE) { - if (job_ptr->resv_ptr && job_ptr->resv_ptr->node_bitmap) { + if (job_ptr->resv_ptr + && job_ptr->resv_ptr->node_bitmap) { bit_not(job_ptr->resv_ptr->node_bitmap); bit_and(avail_node_bitmap, job_ptr->resv_ptr->node_bitmap); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 83c1f52159c7fdc9673f614a3567025bd70cf3d2..479f9da689396e5ad2376735097ba0b7eaa78555 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1095,8 +1095,8 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, (365 * 24 * 60 * 60); /* secs in year */ else job_ptr->end_time = job_ptr->start_time + - (job_ptr->time_limit * 60); /* secs */ - + (job_ptr->time_limit * 60); /* secs */ + if (select_g_job_begin(job_ptr) != SLURM_SUCCESS) { /* Leave job queued, something is hosed */ error("select_g_job_begin(%u): %m", job_ptr->job_id); diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 14417cbbee946dd4ff3b7b48a3db0ef3a18a43c5..53d2dd38b3b1ac4b32660671df536c192ab25aa5 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -880,7 +880,8 @@ extern int update_part (update_part_msg_t * part_desc, bool create_flag) info("update_part: DefaultTime would exceed MaxTime for " "partition %s", part_desc->name); } else if (part_desc->default_time != NO_VAL) { - info("update_part: setting default_time to %u for partition %s", + info("update_part: setting default_time to %u " + "for partition %s", part_desc->default_time, part_desc->name); part_ptr->default_time = part_desc->default_time; } @@ -963,7 +964,8 @@ extern int update_part (update_part_msg_t * part_desc, bool create_flag) info("update_part: setting default partition to %s", part_desc->name); } else if (strcmp(default_part_name, part_desc->name) != 0) { - info("update_part: changing default partition from %s to %s", + info("update_part: changing default " + "partition from %s to %s", default_part_name, part_desc->name); } xfree(default_part_name); @@ -1027,7 +1029,8 @@ extern int update_part (update_part_msg_t * part_desc, bool create_flag) xfree(part_ptr->nodes); part_ptr->nodes = backup_node_list; } else { - info("update_part: setting nodes to %s for partition %s", + info("update_part: setting nodes to %s " + "for partition %s", part_ptr->nodes, part_desc->name); xfree(backup_node_list); } @@ -1328,5 +1331,9 @@ extern int delete_partition(delete_part_msg_t *part_desc_ptr) list_delete_all(part_list, list_find_part, part_desc_ptr->name); last_part_update = time(NULL); + slurm_sched_partition_change(); /* notify sched plugin */ + select_g_reconfigure(); /* notify select plugin too */ + reset_job_priority(); /* free jobs */ + return SLURM_SUCCESS; } diff --git a/src/slurmctld/power_save.c b/src/slurmctld/power_save.c index c1c88f20c786778025a0ed12ec7742bf5dc2d3fb..4c94c176b6d20fec20bd47062609841cc0b160f4 100644 --- a/src/slurmctld/power_save.c +++ b/src/slurmctld/power_save.c @@ -139,7 +139,7 @@ static void _do_power_work(void) if (susp_state) susp_total++; if (susp_state && - ((suspend_rate == 0) || (suspend_cnt <= suspend_rate)) && + ((suspend_rate == 0) || (suspend_cnt < suspend_rate)) && (bit_test(suspend_node_bitmap, i) == 0) && (IS_NODE_ALLOCATED(node_ptr) || (node_ptr->last_idle > (now - idle_time)))) { @@ -157,7 +157,7 @@ static void _do_power_work(void) } if (run_suspend && (susp_state == 0) && - ((resume_rate == 0) || (resume_cnt <= resume_rate)) && + ((resume_rate == 0) || (resume_cnt < resume_rate)) && IS_NODE_IDLE(node_ptr) && (!IS_NODE_COMPLETING(node_ptr)) && (node_ptr->last_idle < (now - idle_time)) && diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index d864f9d14cb295b07ff4ce322674e6fe7fd89317..d9d2cd5fa4864483280976eb3794ee65ab9e6c34 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -3173,6 +3173,8 @@ _run_prolog(uint32_t jobid, uid_t uid, char *resv_id, char *my_prolog; char **my_env = _build_env(jobid, uid, resv_id, spank_job_env, spank_job_env_size); + time_t start_time = time(NULL), diff_time; + static uint16_t msg_timeout = 0; slurm_mutex_lock(&conf->config_mutex); my_prolog = xstrdup(conf->prolog); @@ -3182,6 +3184,14 @@ _run_prolog(uint32_t jobid, uid_t uid, char *resv_id, xfree(my_prolog); _destroy_env(my_env); + diff_time = difftime(time(NULL), start_time); + if (msg_timeout == 0) + msg_timeout = slurm_get_msg_timeout(); + if (diff_time >= msg_timeout) { + error("prolog for job %u ran for %d seconds", + jobid, diff_time); + } + return error_code; }