diff --git a/NEWS b/NEWS index 585e97e0bba855450128515cbcd7f10cb0e4530a..1534d5a5d68b9199591421d9e3fe9490ab67a027 100644 --- a/NEWS +++ b/NEWS @@ -247,14 +247,19 @@ documents those changes that are of interest to users and administrators. -- The start time of a reservation that is in ACTIVE state cannot be modified. -- Update the cgroup documentation about release agent for devices. -- MYSQL - fix for setting up preempt list on a QOS for multiple QOS. - -- Correct a minore error in the scancel.1 man page related to the + -- Correct a minor error in the scancel.1 man page related to the --signal option. -- Enhance the scancel.1 man page to document the sequence of signals sent - -- Fix slurmstepd core dump if the cgroup hierarchy is not completed. + -- Fix slurmstepd core dump if the cgroup hierarchy is not completed when terminating the job. -- Fix hostlist_shift to be able to give correct node names on names with a different number of dimensions than the cluster. -- BLUEGENE - Fix invalid pointer in corner case in the plugin. + -- Make sure on a reconfigure the select information for a node is preserved. + -- Correct logic to support job GRES specification over 31 bits (problem + in logic converting int to uint32_t). + -- Remove logic that was creating GRES bitmap for node when not needed (only + needed when GRES mapped to specific files). * Changes in Slurm 14.03.8 ========================== diff --git a/src/common/gres.c b/src/common/gres.c index a5b0d12be9b6551041c69891d2a7fcf0d7d9742c..e579cf0939a56192719d9324d03fd29482e5400f 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -1747,7 +1747,7 @@ extern int _node_config_validate(char *node_name, char *orig_config, else if (gres_data->gres_cnt_avail == NO_VAL) gres_data->gres_cnt_avail = 0; - if (context_ptr->has_file || gres_data->gres_cnt_avail) { + if (context_ptr->has_file) { if (gres_data->gres_bit_alloc == NULL) { gres_data->gres_bit_alloc = bit_alloc(gres_data->gres_cnt_avail); @@ -1883,7 +1883,7 @@ static int _node_reconfig(char *node_name, char *orig_config, char **new_config, else if (gres_data->gres_cnt_avail == NO_VAL) gres_data->gres_cnt_avail = 0; - if (context_ptr->has_file || gres_data->gres_cnt_avail) { + if (context_ptr->has_file) { if (gres_data->gres_bit_alloc == NULL) { gres_data->gres_bit_alloc = bit_alloc(gres_data->gres_cnt_avail); @@ -2490,7 +2490,7 @@ static int _job_state_validate(char *config, void **gres_data, { gres_job_state_t *gres_ptr; char *type = NULL, *num = NULL, *last_num = NULL; - int cnt; + long cnt; if (!strcmp(config, context_ptr->gres_name)) { cnt = 1; @@ -2511,7 +2511,7 @@ static int _job_state_validate(char *config, void **gres_data, cnt *= (1024 * 1024 * 1024); else return SLURM_ERROR; - if (cnt < 0) + if ((cnt < 0) || (cnt > 0xffffffff)) return SLURM_ERROR; } else { /* Did not find this GRES name, check for zero value */ diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 4a9b5e35de3c92bd7fed8ffb839a587c8e85376a..b1d843adff301508b3d876b6b9b3dbdd21a916fe 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -1154,6 +1154,8 @@ static int _restore_node_state(int recover, for (i=0, old_node_ptr=old_node_table_ptr; i<old_node_record_count; i++, old_node_ptr++) { uint32_t drain_flag = false, down_flag = false; + dynamic_plugin_data_t *tmp_select_nodeinfo; + node_ptr = find_node_record(old_node_ptr->name); if (node_ptr == NULL) continue; @@ -1195,6 +1197,12 @@ static int _restore_node_state(int recover, node_ptr->last_response = old_node_ptr->last_response; + /* make sure we get the old state from the select + * plugin, just swap it out to avoid possible memory leak */ + tmp_select_nodeinfo = node_ptr->select_nodeinfo; + node_ptr->select_nodeinfo = old_node_ptr->select_nodeinfo; + old_node_ptr->select_nodeinfo = tmp_select_nodeinfo; + #ifndef HAVE_BG /* If running on a BlueGene system the cpus never change so just skip this.