diff --git a/NEWS b/NEWS index 36d61760a21bf2992d03f5c328e1940242245dd9..152f6a7fb5567f655a5495ccc680dac71cd20500 100644 --- a/NEWS +++ b/NEWS @@ -93,6 +93,8 @@ documents those changes that are of interest to users and admins. -- Fix infinite loop when using accounting_storage/mysql plugin either from the slurmctld or slurmdbd daemon. -- Added more thread safety for assoc_mgr in the controller. + -- For sched/wiki2 (Moab), permit clearing of a job's dependencies with the + JOB_MODIFY option "DEPEND=0". * Changes in SLURM 1.3.10 ========================= diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index dacf52d3e0712af46e6be14a1582956e5d9c5882..10b16bb099c8bd43f39115dff78d4c55ad74970d 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -855,7 +855,8 @@ Comma separated list of Quality of Service names (Defined in sacctmgr). > sacctmgr list associations cluster=tux format=Account,Cluster,User,Fairshare tree withd .br > sacctmgr list transactions StartTime=11/03\-10:30:00 format=Timestamp,Action,Actor -.br> sacctmgr dump cluster=tux tux_data_file +.br +> sacctmgr dump cluster=tux tux_data_file .br > sacctmgr load tux_data_file .br diff --git a/doc/man/man1/scancel.1 b/doc/man/man1/scancel.1 index 773fcecc0e9ded8a464fd1927bfdbdb855b1d27c..da20d53677010628d3d06848d4a2c9ab00f02fae 100644 --- a/doc/man/man1/scancel.1 +++ b/doc/man/man1/scancel.1 @@ -1,4 +1,4 @@ -.TH SCANCEL "1" "August 2007" "scancel 1.2" "Slurm components" +.TH SCANCEL "1" "November 2008" "scancel 1.2" "Slurm components" .SH "NAME" scancel \- Used to signal jobs or job steps that are under the control of Slurm. @@ -20,6 +20,10 @@ be printed and the job will not be signaled. \fB\-b\fR, \fB\-\-batch\fR Signal the batch job shell and its child processes. This is not applicable if \fIstep_id\fR is specified. +NOTE: The shell itself may exit upon receipt of many signals. +You may avoid this by explicitly trap signals within the shell +script (e.g. "trap <arg> <signals>"). See the shell documentation +for details. .TP \fB-\-ctld\fR diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index e384256e53c4465d53d747664a990a6544748706..99b12d6b65fc38dd1cca4b82d39d1b564bc6b096 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -304,9 +304,6 @@ Set the job's minimum number of cores per socket to the specified value. \fIMinMemory\fP=<megabytes> Set the job's minimum real memory required per nodes to the specified value. .TP -\fIMinNodes\fP=<min_count>[-<max_count>] -Set the job's minimum and optionally maximum count of nodes to be allocated. -.TP \fIMinProcs\fP=<count> Set the job's minimum number of processors per nodes to the specified value. .TP @@ -342,8 +339,8 @@ Set the job's list of required node. Multiple node names may be specified using simple node range expressions (e.g. "lx[10\-20]"). Value may be cleared with blank data value, "ReqNodeList=". .TP -\fIReqNodes\fP=<count> -Set the job's count of required nodes to the specified value. +\fIReqNodes\fP=<min_count>[-<max_count>] +Set the job's minimum and optionally maximum count of nodes to be allocated. .TP \fIReqSockets\fP=<count> Set the job's count of required sockets to the specified value. diff --git a/src/common/env.c b/src/common/env.c index e0c26612031ee18a22ebcd872fb3c6c730922523..a8d91d80972677ff8e17f1b952b2cc65621a0caa 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -734,8 +734,8 @@ static char *_uint16_array_to_str(int array_len, const uint16_t *array) * array. Free with xfree(). */ extern char *uint32_compressed_to_str(uint32_t array_len, - const uint16_t *array, - const uint32_t *array_reps) + const uint16_t *array, + const uint32_t *array_reps) { int i; char *sep = ","; /* seperator */ @@ -778,7 +778,7 @@ extern char *uint32_compressed_to_str(uint32_t array_len, */ void env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, - job_desc_msg_t *desc) + const job_desc_msg_t *desc) { char *bgl_part_id = NULL, *tmp; slurm_step_layout_t *step_layout = NULL; @@ -816,13 +816,14 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", alloc->node_cnt); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list); - if(num_tasks == NO_VAL) + if(num_tasks == NO_VAL) num_tasks = desc->num_procs; step_layout = slurm_step_layout_create(alloc->node_list, alloc->cpus_per_node, alloc->cpu_count_reps, alloc->node_cnt, num_tasks, + desc->cpus_per_task, desc->task_dist, desc->plane_size); tmp = _uint16_array_to_str(step_layout->node_cnt, @@ -861,13 +862,17 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, { char *tmp; uint32_t num_nodes = 0; + uint32_t num_cpus = 0; int i; slurm_step_layout_t *step_layout = NULL; - /* there is no explicit node count in the batch structure, - so we need to calculate the node count */ + /* There is no explicit node count in the batch structure, + * so we need to calculate the node count. We also need to + * figure out the explicit cpu count so we can figure out the + * cpus_per_task. */ for (i = 0; i < batch->num_cpu_groups; i++) { num_nodes += batch->cpu_count_reps[i]; + num_cpus += batch->cpu_count_reps[i] * batch->cpus_per_node[i]; } env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id); @@ -891,6 +896,14 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", num_nodes); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes); if(batch->nprocs) { + /* we can figure out the cpus_per_task here by + * reversing what happens in sbatch */ + int cpus_per_task = num_cpus / batch->nprocs; +/* info(" we have %u / %u = %u", num_cpus, */ +/* batch->nprocs, cpus_per_task); */ + if(cpus_per_task < 1) + cpus_per_task = 1; + xfree(tmp); env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", batch->nprocs); @@ -899,6 +912,7 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, batch->cpu_count_reps, num_nodes, batch->nprocs, + (uint16_t)cpus_per_task, (uint16_t) SLURM_DIST_BLOCK, (uint16_t)NO_VAL); @@ -907,6 +921,7 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, slurm_step_layout_destroy(step_layout); } env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); + xfree(tmp); } diff --git a/src/common/env.h b/src/common/env.h index 1162f3c4ce637abfa9aa77bdf818cb80452dc61c..6c577dab44024a1d5e9ab9e247fc5c78404ba9a9 100644 --- a/src/common/env.h +++ b/src/common/env.h @@ -105,7 +105,7 @@ int setup_env(env_t *env); */ void env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, - job_desc_msg_t *desc); + const job_desc_msg_t *desc); /* * Set in "dest" the environment variables relevant to a SLURM batch diff --git a/src/common/read_config.c b/src/common/read_config.c index fab650a7be6b6e5429849bac1ce43a08985033f6..e3396528690fdbd77442678fc66d2da260071f53 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -1760,10 +1760,12 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_uint16(&conf->max_job_cnt, "MaxJobCount", hashtbl)) conf->max_job_cnt = DEFAULT_MAX_JOB_COUNT; - if ((s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerCPU", hashtbl)) || - (s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerTask", hashtbl))) + if ((s_p_get_uint32(&conf->max_mem_per_task, + "MaxMemPerCPU", hashtbl)) || + (s_p_get_uint32(&conf->max_mem_per_task, + "MaxMemPerTask", hashtbl))) { conf->max_mem_per_task |= MEM_PER_CPU; - else if (!s_p_get_uint32(&conf->max_mem_per_task, + } else if (!s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerNode", hashtbl)) { conf->max_mem_per_task = DEFAULT_MAX_MEM_PER_CPU; } diff --git a/src/common/slurm_resource_info.c b/src/common/slurm_resource_info.c index bf1692ac3fa8484f1a59c2ec89ab55122fdfb942..3501f0c062006e98ddd134d5682b8ac8ec07777d 100644 --- a/src/common/slurm_resource_info.c +++ b/src/common/slurm_resource_info.c @@ -289,7 +289,6 @@ int slurm_get_avail_procs(const uint16_t max_sockets, /*** factor cpus_per_task into max_cpus ***/ max_cpus *= cpus_per_task; - /*** round down available based on cpus_per_task ***/ avail_cpus = (*cpus / cpus_per_task) * cpus_per_task; avail_cpus = MIN(avail_cpus, max_cpus); diff --git a/src/common/slurm_step_layout.c b/src/common/slurm_step_layout.c index a8e22842312aeeb66649f22f2d0283af8729e70c..4b144d205fcfd4517f0190e500298801b4bd0e8e 100644 --- a/src/common/slurm_step_layout.c +++ b/src/common/slurm_step_layout.c @@ -56,6 +56,7 @@ static int _init_task_layout(slurm_step_layout_t *step_layout, const char *arbitrary_nodes, uint16_t *cpus_per_node, uint32_t *cpu_count_reps, + uint16_t cpus_per_task, uint16_t task_dist, uint16_t plane_size); static int _task_layout_block(slurm_step_layout_t *step_layout, @@ -80,6 +81,7 @@ static int _task_layout_hostfile(slurm_step_layout_t *step_layout, * IN cpu_count_reps - how many nodes have same cpu count * IN num_hosts - number of hosts we have * IN num_tasks - number of tasks to distribute across these cpus + * IN cpus_per_task - number of cpus per task * IN task_dist - type of distribution we are using * IN plane_size - plane size (only needed for the plane distribution) * RET a pointer to an slurm_step_layout_t structure @@ -90,6 +92,7 @@ slurm_step_layout_t *slurm_step_layout_create( uint16_t *cpus_per_node, uint32_t *cpu_count_reps, uint32_t num_hosts, uint32_t num_tasks, + uint16_t cpus_per_task, uint16_t task_dist, uint16_t plane_size) { @@ -127,6 +130,7 @@ slurm_step_layout_t *slurm_step_layout_create( if(_init_task_layout(step_layout, arbitrary_nodes, cpus_per_node, cpu_count_reps, + cpus_per_task, task_dist, plane_size) != SLURM_SUCCESS) { slurm_step_layout_destroy(step_layout); @@ -390,6 +394,7 @@ char *slurm_step_layout_host_name (slurm_step_layout_t *s, int taskid) static int _init_task_layout(slurm_step_layout_t *step_layout, const char *arbitrary_nodes, uint16_t *cpus_per_node, uint32_t *cpu_count_reps, + uint16_t cpus_per_task, uint16_t task_dist, uint16_t plane_size) { int cpu_cnt = 0, cpu_inx = 0, i; @@ -401,7 +406,10 @@ static int _init_task_layout(slurm_step_layout_t *step_layout, return SLURM_ERROR; if (step_layout->tasks) /* layout already completed */ return SLURM_SUCCESS; - + + if((int)cpus_per_task < 1 || cpus_per_task == (uint16_t)NO_VAL) + cpus_per_task = 1; + step_layout->plane_size = plane_size; step_layout->tasks = xmalloc(sizeof(uint16_t) @@ -423,7 +431,7 @@ static int _init_task_layout(slurm_step_layout_t *step_layout, hostlist_destroy(hl); return SLURM_ERROR; } - + for (i=0; i<step_layout->node_cnt; i++) { /* name = hostlist_shift(hl); */ /* if(!name) { */ @@ -433,7 +441,7 @@ static int _init_task_layout(slurm_step_layout_t *step_layout, /* } */ /* debug2("host %d = %s", i, name); */ /* free(name); */ - cpus[i] = cpus_per_node[cpu_inx]; + cpus[i] = (cpus_per_node[cpu_inx] / cpus_per_task); //info("got %d cpus", cpus[i]); if ((++cpu_cnt) >= cpu_count_reps[cpu_inx]) { /* move to next record */ diff --git a/src/common/slurm_step_layout.h b/src/common/slurm_step_layout.h index 2b19f04015d572a4a7faa52ff7839aba551e9be9..fc71d22c32bfee35d88e016c86c28ca5b8ddb268 100644 --- a/src/common/slurm_step_layout.h +++ b/src/common/slurm_step_layout.h @@ -55,6 +55,7 @@ * IN cpu_count_reps - how many nodes have same cpu count * IN node_cnt - number of nodes we have * IN task_cnt - number of tasks to distribute across these cpus + * IN cpus_per_task - number of cpus per task * IN task_dist - type of distribution we are using * IN plane_size - plane size (only needed for the plane distribution) * RET a pointer to an slurm_step_layout_t structure @@ -65,6 +66,7 @@ extern slurm_step_layout_t *slurm_step_layout_create(const char *tlist, uint32_t *cpu_count_reps, uint32_t node_cnt, uint32_t task_cnt, + uint16_t cpus_per_task, uint16_t task_dist, uint16_t plane_size); diff --git a/src/plugins/select/bluegene/block_allocator/bridge_linker.c b/src/plugins/select/bluegene/block_allocator/bridge_linker.c index f3251f291250c0409446941a32d99776fdcc57e6..89a3c54e22dc19f6a0418307f2f28649bcc43494 100644 --- a/src/plugins/select/bluegene/block_allocator/bridge_linker.c +++ b/src/plugins/select/bluegene/block_allocator/bridge_linker.c @@ -81,6 +81,9 @@ typedef struct { /* all the pm functions */ status_t (*create_partition)(pm_partition_id_t pid); +#ifndef HAVE_BGL + status_t (*reboot_partition)(pm_partition_id_t pid); +#endif status_t (*destroy_partition)(pm_partition_id_t pid); /* set say message stuff */ @@ -169,6 +172,7 @@ extern int bridge_init() "jm_signal_job", "jm_cancel_job", "pm_create_partition", + "pm_reboot_partition", "pm_destroy_partition", "setSayMessageParams" }; @@ -599,6 +603,21 @@ extern status_t bridge_create_block(pm_partition_id_t pid) } +#ifndef HAVE_BGL +extern status_t bridge_reboot_block(pm_partition_id_t pid) +{ + int rc = CONNECTION_ERROR; + if(!bridge_init()) + return rc; + + slurm_mutex_lock(&api_file_mutex); + rc = (*(bridge_api.reboot_partition))(pid); + slurm_mutex_unlock(&api_file_mutex); + return rc; + +} +#endif + extern status_t bridge_destroy_block(pm_partition_id_t pid) { int rc = CONNECTION_ERROR; diff --git a/src/plugins/select/bluegene/plugin/bg_block_info.c b/src/plugins/select/bluegene/plugin/bg_block_info.c index 7cc0055b804ee4119921278e2c49c6d1a1e1965c..315d246e3e31efcd75cfbb38a781c2e3088ff556 100644 --- a/src/plugins/select/bluegene/plugin/bg_block_info.c +++ b/src/plugins/select/bluegene/plugin/bg_block_info.c @@ -88,15 +88,15 @@ static int _block_is_deallocating(bg_record_t *bg_record) if(bg_record->modifying) return SLURM_SUCCESS; - slurm_conf_lock(); - user_name = xstrdup(slurmctld_conf.slurm_user_name); + + user_name = xstrdup(bg_slurm_user_name); if(remove_all_users(bg_record->bg_block_id, NULL) == REMOVE_USER_ERR) { error("Something happened removing " "users from block %s", bg_record->bg_block_id); } - slurm_conf_unlock(); + if(bg_record->target_name && bg_record->user_name) { if(!strcmp(bg_record->target_name, user_name)) { @@ -344,10 +344,10 @@ extern int update_block_list() debug3("checking to make sure user %s " "is the user.", bg_record->target_name); - slurm_conf_lock(); + if(update_block_user(bg_record, 0) == 1) last_bg_update = time(NULL); - slurm_conf_unlock(); + break; case RM_PARTITION_ERROR: error("block in an error state"); @@ -404,6 +404,12 @@ extern int update_block_list() "free state.", bg_record->bg_block_id); break; +#ifndef HAVE_BGL + case RM_PARTITION_REBOOTING: + debug2("Block %s is rebooting.", + bg_record->bg_block_id); + break; +#endif default: debug("Hey the state of block " "%s is %d(%s) doing nothing.", diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index b06a78e62426b27a5786f0a2208d4056785e0033..9bfd36fe607bd37570f5538dc311291242b0c646 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -928,16 +928,16 @@ static int _find_best_block_match(List block_list, tmp_record = xmalloc(sizeof(bg_record_t)); tmp_record->bg_block_list = list_create(destroy_ba_node); - slurm_conf_lock(); - len += strlen(slurmctld_conf.node_prefix)+1; + + len += strlen(bg_slurm_node_prefix)+1; tmp_record->nodes = xmalloc(len); snprintf(tmp_record->nodes, len, "%s%s", - slurmctld_conf.node_prefix, + bg_slurm_node_prefix, tmp_nodes+i); - slurm_conf_unlock(); + process_nodes(tmp_record, false); for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) { diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index 3fdf8600597fd3b7dcc081bbf704d862575833c5..d0ca4e70e32bd9281218001ff473ae38719b100a 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -197,21 +197,20 @@ static int _reset_block(bg_record_t *bg_record) } /* remove user from list */ - slurm_conf_lock(); + if(bg_record->target_name) { if(strcmp(bg_record->target_name, - slurmctld_conf.slurm_user_name)) { + bg_slurm_user_name)) { xfree(bg_record->target_name); bg_record->target_name = - xstrdup(slurmctld_conf. - slurm_user_name); + xstrdup(bg_slurm_user_name); } update_block_user(bg_record, 1); } else { bg_record->target_name = - xstrdup(slurmctld_conf.slurm_user_name); + xstrdup(bg_slurm_user_name); } - slurm_conf_unlock(); + bg_record->boot_state = 0; bg_record->boot_count = 0; @@ -550,8 +549,12 @@ static void _start_agent(bg_update_t *bg_update_ptr) bg_record->modifying = 0; slurm_mutex_unlock(&block_state_mutex); } else if(bg_update_ptr->reboot) +#ifdef HAVE_BGL bg_free_block(bg_record); - +#else + bg_reboot_block(bg_record); +#endif + if(bg_record->state == RM_PARTITION_FREE) { if((rc = boot_block(bg_record)) != SLURM_SUCCESS) { slurm_mutex_lock(&block_state_mutex); @@ -1195,25 +1198,19 @@ extern int sync_jobs(List job_list) extern int boot_block(bg_record_t *bg_record) { #ifdef HAVE_BG_FILES - int rc; - - - slurm_conf_lock(); + int rc; + if ((rc = bridge_set_block_owner(bg_record->bg_block_id, - slurmctld_conf.slurm_user_name)) + bg_slurm_user_name)) != STATUS_OK) { error("bridge_set_part_owner(%s,%s): %s", bg_record->bg_block_id, - slurmctld_conf.slurm_user_name, + bg_slurm_user_name, bg_err_str(rc)); - slurm_conf_unlock(); - return SLURM_ERROR; - } - slurm_conf_unlock(); + } - info("Booting block %s", - bg_record->bg_block_id); + info("Booting block %s", bg_record->bg_block_id); if ((rc = bridge_create_block(bg_record->bg_block_id)) != STATUS_OK) { error("bridge_create_block(%s): %s", diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c index f9dd2cfe16c560fa98b9e737ee8b0efbd914b311..9c2c6aa447be9f31ac8927cd046ca8a60ddaa816 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.c +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c @@ -469,7 +469,7 @@ extern int update_block_user(bg_record_t *bg_record, int set) } if(!bg_record->user_name) { error("No user_name"); - bg_record->user_name = xstrdup(slurmctld_conf.slurm_user_name); + bg_record->user_name = xstrdup(bg_slurm_user_name); } #ifdef HAVE_BG_FILES int rc=0; @@ -483,7 +483,7 @@ extern int update_block_user(bg_record_t *bg_record, int set) return -1; } else if (rc == REMOVE_USER_NONE) { if (strcmp(bg_record->target_name, - slurmctld_conf.slurm_user_name)) { + bg_slurm_user_name)) { info("Adding user %s to Block %s", bg_record->target_name, bg_record->bg_block_id); @@ -636,12 +636,12 @@ extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq) } bg_record = (bg_record_t*) xmalloc(sizeof(bg_record_t)); - slurm_conf_lock(); + bg_record->user_name = - xstrdup(slurmctld_conf.slurm_user_name); + xstrdup(bg_slurm_user_name); bg_record->target_name = - xstrdup(slurmctld_conf.slurm_user_name); - slurm_conf_unlock(); + xstrdup(bg_slurm_user_name); + pw_uid = uid_from_string(bg_record->user_name); if(pw_uid == (uid_t) -1) { error("No such user: %s", bg_record->user_name); @@ -676,12 +676,12 @@ extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq) if(i<len) { len -= i; - slurm_conf_lock(); - len += strlen(slurmctld_conf.node_prefix)+1; + + len += strlen(bg_slurm_node_prefix)+1; bg_record->nodes = xmalloc(len); snprintf(bg_record->nodes, len, "%s%s", - slurmctld_conf.node_prefix, blockreq->block+i); - slurm_conf_unlock(); + bg_slurm_node_prefix, blockreq->block+i); + } else fatal("BPs=%s is in a weird format", blockreq->block); @@ -850,13 +850,13 @@ static int _addto_node_list(bg_record_t *bg_record, int *start, int *end) for (x = start[X]; x <= end[X]; x++) { for (y = start[Y]; y <= end[Y]; y++) { for (z = start[Z]; z <= end[Z]; z++) { - slurm_conf_lock(); + snprintf(node_name_tmp, sizeof(node_name_tmp), "%s%c%c%c", - slurmctld_conf.node_prefix, + bg_slurm_node_prefix, alpha_num[x], alpha_num[y], alpha_num[z]); - slurm_conf_unlock(); + ba_node = ba_copy_node( &ba_system_ptr->grid[x][y][z]); ba_node->used = 1; diff --git a/src/plugins/select/bluegene/plugin/block_sys.c b/src/plugins/select/bluegene/plugin/block_sys.c index b337bf6b7204d305d3d4939fa5543c0baa123351..5f8a62765147de3b4b876ece86c84b1053384d0d 100755 --- a/src/plugins/select/bluegene/plugin/block_sys.c +++ b/src/plugins/select/bluegene/plugin/block_sys.c @@ -148,12 +148,12 @@ static void _pre_allocate(bg_record_t *bg_record) &send_psets)) != STATUS_OK) error("bridge_set_data(RM_PartitionPsetsPerBP)", bg_err_str(rc)); - slurm_conf_lock(); + if ((rc = bridge_set_data(bg_record->bg_block, RM_PartitionUserName, - slurmctld_conf.slurm_user_name)) + bg_slurm_user_name)) != STATUS_OK) error("bridge_set_data(RM_PartitionUserName)", bg_err_str(rc)); - slurm_conf_unlock(); + /* info("setting it here"); */ /* bg_record->bg_block_id = "RMP101"; */ /* if ((rc = bridge_set_data(bg_record->bg_block, RM_PartitionID, */ @@ -213,14 +213,14 @@ static int _post_allocate(bg_record_t *bg_record) xfree(bg_record->target_name); - slurm_conf_lock(); + bg_record->target_name = - xstrdup(slurmctld_conf.slurm_user_name); + xstrdup(bg_slurm_user_name); xfree(bg_record->user_name); bg_record->user_name = - xstrdup(slurmctld_conf.slurm_user_name); - slurm_conf_unlock(); + xstrdup(bg_slurm_user_name); + my_uid = uid_from_string(bg_record->user_name); if (my_uid == (uid_t) -1) { @@ -625,14 +625,14 @@ int read_bg_blocks() } free(bpid); - slurm_conf_lock(); + snprintf(node_name_tmp, sizeof(node_name_tmp), "%s%c%c%c", - slurmctld_conf.node_prefix, + bg_slurm_node_prefix, alpha_num[coord[X]], alpha_num[coord[Y]], alpha_num[coord[Z]]); - slurm_conf_unlock(); + hostlist_push(hostlist, node_name_tmp); } @@ -683,14 +683,12 @@ int read_bg_blocks() bg_err_str(rc)); } else { if(bp_cnt==0) { - slurm_conf_lock(); + bg_record->user_name = - xstrdup(slurmctld_conf. - slurm_user_name); + xstrdup(bg_slurm_user_name); bg_record->target_name = - xstrdup(slurmctld_conf. - slurm_user_name); - slurm_conf_unlock(); + xstrdup(bg_slurm_user_name); + } else { user_name = NULL; if ((rc = bridge_get_data( @@ -710,11 +708,10 @@ int read_bg_blocks() bg_record->user_name = xstrdup(user_name); if(!bg_record->boot_state) { - slurm_conf_lock(); + bg_record->target_name = - xstrdup(slurmctld_conf. - slurm_user_name); - slurm_conf_unlock(); + xstrdup(bg_slurm_user_name); + } else bg_record->target_name = xstrdup(user_name); diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index 70ed38e41906d61478678d34838c7a8b746c83b9..56cbf3702fb147e6b3529b42e79ac86c3f34fb49 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -69,6 +69,8 @@ char *default_blrtsimage = NULL; char *default_linuximage = NULL; char *default_mloaderimage = NULL, *default_ramdiskimage = NULL; char *bridge_api_file = NULL; +char *bg_slurm_user_name = NULL; +char *bg_slurm_node_prefix = NULL; bg_layout_t bluegene_layout_mode = NO_VAL; uint16_t bluegene_numpsets = 0; uint16_t bluegene_bp_node_cnt = 0; @@ -131,6 +133,7 @@ extern int init_bg(void) DIM_SIZE[Z]=bp_size.Z; #endif ba_init(NULL); + info("BlueGene plugin loaded successfully"); return SLURM_SUCCESS; @@ -200,6 +203,8 @@ extern void fini_bg(void) xfree(default_ramdiskimage); xfree(bridge_api_file); xfree(bg_conf); + xfree(bg_slurm_user_name); + xfree(bg_slurm_node_prefix); #ifdef HAVE_BG_FILES if(bg) @@ -307,7 +312,7 @@ extern int remove_all_users(char *bg_block_id, char *user_name) error("No user was returned from database"); continue; } - if(!strcmp(user, slurmctld_conf.slurm_user_name)) { + if(!strcmp(user, bg_slurm_user_name)) { free(user); continue; } @@ -348,7 +353,7 @@ extern int set_block_user(bg_record_t *bg_record) bg_record->bg_block_id); bg_record->boot_state = 0; bg_record->boot_count = 0; - slurm_conf_lock(); + if((rc = update_block_user(bg_record, 1)) == 1) { last_bg_update = time(NULL); rc = SLURM_SUCCESS; @@ -359,9 +364,8 @@ extern int set_block_user(bg_record_t *bg_record) rc = SLURM_ERROR; } xfree(bg_record->target_name); - bg_record->target_name = - xstrdup(slurmctld_conf.slurm_user_name); - slurm_conf_unlock(); + bg_record->target_name = xstrdup(bg_slurm_user_name); + return rc; } @@ -595,6 +599,69 @@ extern int bg_free_block(bg_record_t *bg_record) return SLURM_SUCCESS; } +#ifndef HAVE_BGL +/* This function not available in bgl land */ +extern int bg_reboot_block(bg_record_t *bg_record) +{ +#ifdef HAVE_BG_FILES + int rc; +#endif + if(!bg_record) { + error("bg_reboot_block: there was no bg_record"); + return SLURM_ERROR; + } + + while (1) { + if(!bg_record) { + error("bg_reboot_block: there was no bg_record"); + return SLURM_ERROR; + } + + slurm_mutex_lock(&block_state_mutex); + if (bg_record->state != NO_VAL + && bg_record->state != RM_PARTITION_REBOOTING) { +#ifdef HAVE_BG_FILES + debug2("bridge_reboot %s", bg_record->bg_block_id); + + rc = bridge_reboot_block(bg_record->bg_block_id); + if (rc != STATUS_OK) { + if(rc == PARTITION_NOT_FOUND) { + debug("block %s is not found", + bg_record->bg_block_id); + break; + } else if(rc == INCOMPATIBLE_STATE) { + debug2("bridge_reboot_partition" + "(%s): %s State = %d", + bg_record->bg_block_id, + bg_err_str(rc), + bg_record->state); + } else { + error("bridge_reboot_partition" + "(%s): %s State = %d", + bg_record->bg_block_id, + bg_err_str(rc), + bg_record->state); + } + } +#else + bg_record->state = RM_PARTITION_READY; + break; +#endif + } + + if ((bg_record->state == RM_PARTITION_CONFIGURING) + || (bg_record->state == RM_PARTITION_ERROR)) { + break; + } + slurm_mutex_unlock(&block_state_mutex); + sleep(3); + } + slurm_mutex_unlock(&block_state_mutex); + + return SLURM_SUCCESS; +} +#endif + /* Free multiple blocks in parallel */ extern void *mult_free_block(void *args) { diff --git a/src/plugins/select/bluegene/plugin/bluegene.h b/src/plugins/select/bluegene/plugin/bluegene.h index d80c26dd54f8e346f5632f070672b9103a64cf75..f06c08aca34b6aa0b7161b847bdfa7b6f290db2c 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.h +++ b/src/plugins/select/bluegene/plugin/bluegene.h @@ -60,6 +60,8 @@ extern char *default_linuximage; extern char *default_mloaderimage; extern char *default_ramdiskimage; extern char *bridge_api_file; +extern char *bg_slurm_user_name; +extern char *bg_slurm_node_prefix; extern bg_layout_t bluegene_layout_mode; extern uint16_t bluegene_numpsets; extern uint16_t bluegene_bp_node_cnt; @@ -136,6 +138,11 @@ extern void sort_bg_record_inc_size(List records); extern void *bluegene_agent(void *args); extern int bg_free_block(bg_record_t *bg_record); + +#ifndef HAVE_BGL +extern int bg_reboot_block(bg_record_t *bg_record); +#endif + extern int remove_from_bg_list(List my_bg_list, bg_record_t *bg_record); extern bg_record_t *find_and_remove_org_from_bg_list(List my_list, bg_record_t *bg_record); diff --git a/src/plugins/select/bluegene/plugin/defined_block.c b/src/plugins/select/bluegene/plugin/defined_block.c index 258098db47ecaa95d443ad40db5eab93c6f16c71..610ef356fc4fb61f60c9719f58dc363468126fef 100644 --- a/src/plugins/select/bluegene/plugin/defined_block.c +++ b/src/plugins/select/bluegene/plugin/defined_block.c @@ -161,11 +161,11 @@ extern int create_defined_blocks(bg_layout_t overlapped, &block_state_mutex); return SLURM_ERROR; } - slurm_conf_lock(); + snprintf(temp, sizeof(temp), "%s%s", - slurmctld_conf.node_prefix, + bg_slurm_node_prefix, name); - slurm_conf_unlock(); + xfree(name); if(strcmp(temp, bg_record->nodes)) { fatal("given list of %s " @@ -270,18 +270,18 @@ extern int create_full_system_block(List bg_found_block_list) /* geo[Y] = max_dim[Y]; */ /* geo[Z] = max_dim[Z]; */ /* #endif */ - slurm_conf_lock(); - i = (10+strlen(slurmctld_conf.node_prefix)); + + i = (10+strlen(bg_slurm_node_prefix)); name = xmalloc(i); if((geo[X] == 0) && (geo[Y] == 0) && (geo[Z] == 0)) snprintf(name, i, "%s000", - slurmctld_conf.node_prefix); + bg_slurm_node_prefix); else snprintf(name, i, "%s[000x%c%c%c]", - slurmctld_conf.node_prefix, + bg_slurm_node_prefix, alpha_num[geo[X]], alpha_num[geo[Y]], alpha_num[geo[Z]]); - slurm_conf_unlock(); + if(bg_found_block_list) { itr = list_iterator_create(bg_found_block_list); diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 06e0f00a1cf20db530c8dbc9ec3e0ae207e46460..85903f995d1fd940403cc8a04004add2a340979c 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -218,6 +218,16 @@ extern int fini ( void ) */ extern int select_p_block_init(List part_list) { + xfree(bg_slurm_user_name); + xfree(bg_slurm_node_prefix); + + slurm_conf_lock(); + xassert(slurmctld_conf.slurm_user_name); + xassert(slurmctld_conf.node_prefix); + bg_slurm_user_name = xstrdup(slurmctld_conf.slurm_user_name); + bg_slurm_node_prefix = xstrdup(slurmctld_conf.node_prefix); + slurm_conf_unlock(); + #ifdef HAVE_BG if(read_bg_conf() == SLURM_ERROR) { fatal("Error, could not read the file"); @@ -297,12 +307,15 @@ extern int select_p_state_save(char *dir_name) \* unlock_slurmctld(part_read_lock); - see below */ /* write the buffer to file */ + slurm_conf_lock(); old_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(old_file, "/block_state.old"); reg_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(reg_file, "/block_state"); new_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(new_file, "/block_state.new"); + slurm_conf_unlock(); + log_fd = creat(new_file, 0600); if (log_fd == 0) { error("Can't save state, error creating file %s, %m", @@ -539,12 +552,12 @@ extern int select_p_state_restore(char *dir_name) process_nodes(bg_record, true); - slurm_conf_lock(); + bg_record->target_name = - xstrdup(slurmctld_conf.slurm_user_name); + xstrdup(bg_slurm_user_name); bg_record->user_name = - xstrdup(slurmctld_conf.slurm_user_name); - slurm_conf_unlock(); + xstrdup(bg_slurm_user_name); + my_uid = uid_from_string(bg_record->user_name); if (my_uid == (uid_t) -1) { error("uid_from_strin(%s): %m", @@ -581,11 +594,11 @@ extern int select_p_state_restore(char *dir_name) continue; } - slurm_conf_lock(); + snprintf(temp, sizeof(temp), "%s%s", - slurmctld_conf.node_prefix, + bg_slurm_node_prefix, name); - slurm_conf_unlock(); + xfree(name); if(strcmp(temp, bg_record->nodes)) { diff --git a/src/plugins/select/bluegene/plugin/state_test.c b/src/plugins/select/bluegene/plugin/state_test.c index 93db7cae8a0a95d09075a048e042a48c4f47b192..b6c9a9881400bbba6f0acb1c06d11e323f001879 100644 --- a/src/plugins/select/bluegene/plugin/state_test.c +++ b/src/plugins/select/bluegene/plugin/state_test.c @@ -119,12 +119,12 @@ static void _configure_node_down(rm_bp_id_t bp_id, my_bluegene_t *bg) error("bridge_get_data(RM_BPLoc): %s", bg_err_str(rc)); continue; } - slurm_conf_lock(); + snprintf(bg_down_node, sizeof(bg_down_node), "%s%c%c%c", - slurmctld_conf.node_prefix, + bg_slurm_node_prefix, alpha_num[bp_loc.X], alpha_num[bp_loc.Y], alpha_num[bp_loc.Z]); - slurm_conf_unlock(); + if (node_already_down(bg_down_node)) break; @@ -212,12 +212,12 @@ static void _test_down_nodes(my_bluegene_t *bg) continue; } - slurm_conf_lock(); + snprintf(bg_down_node, sizeof(bg_down_node), "%s%c%c%c", - slurmctld_conf.node_prefix, + bg_slurm_node_prefix, alpha_num[bp_loc.X], alpha_num[bp_loc.Y], alpha_num[bp_loc.Z]); - slurm_conf_unlock(); + if (node_already_down(bg_down_node)) continue; @@ -425,12 +425,12 @@ extern int check_block_bp_states(char *bg_block_id) "BP ID %s", (char *) bpid); } free(bpid); - slurm_conf_lock(); + snprintf(bg_down_node, sizeof(bg_down_node), "%s%c%c%c", - slurmctld_conf.node_prefix, + bg_slurm_node_prefix, alpha_num[coord[X]], alpha_num[coord[Y]], alpha_num[coord[Z]]); - slurm_conf_unlock(); + if (node_already_down(bg_down_node)) continue; diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c index 6a68a73bc67542fe4f4e08e1eb7960f542521277..27df3af0c6003f429b935675c3cf99eb4cd32ed6 100644 --- a/src/scontrol/update_job.c +++ b/src/scontrol/update_job.c @@ -258,6 +258,7 @@ scontrol_update_job (int argc, char *argv[]) (char **) NULL, 10); update_cnt++; } + /* MinNodes was replaced by ReqNodes in SLURM version 1.2 */ else if ((strncasecmp(argv[i], "MinNodes=", 9) == 0) || (strncasecmp(argv[i], "ReqNodes=", 9) == 0)) { char *tmp; diff --git a/src/slurmctld/acct_policy.c b/src/slurmctld/acct_policy.c index d84209867636101f0580f3878d951879c0c6e023..3f4d7df4bb7b1ed759b43c482dec33b0603a7038 100644 --- a/src/slurmctld/acct_policy.c +++ b/src/slurmctld/acct_policy.c @@ -234,6 +234,12 @@ extern bool acct_policy_job_runnable(struct job_record *job_ptr) (assoc_ptr->used_jobs >= assoc_ptr->grp_jobs)) { job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT; xfree(job_ptr->state_desc); + debug2("job %u being held, " + "assoc %u is at or exceeds " + "group max jobs limit %u with %u for account %s", + job_ptr->job_id, assoc_ptr->id, + assoc_ptr->grp_jobs, + assoc_ptr->used_jobs, assoc_ptr->acct); rc = false; goto end_it; } @@ -255,6 +261,16 @@ extern bool acct_policy_job_runnable(struct job_record *job_ptr) job_ptr->state_reason = WAIT_ASSOC_RESOURCE_LIMIT; xfree(job_ptr->state_desc); + debug2("job %u being held, " + "assoc %u is at or exceeds " + "group max node limit %u " + "with already used %u + requested %u " + "for account %s", + job_ptr->job_id, assoc_ptr->id, + assoc_ptr->grp_nodes, + assoc_ptr->grp_used_nodes, + job_ptr->details->min_nodes, + assoc_ptr->acct); rc = false; goto end_it; } @@ -307,6 +323,12 @@ extern bool acct_policy_job_runnable(struct job_record *job_ptr) (assoc_ptr->used_jobs >= assoc_ptr->max_jobs)) { job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT; xfree(job_ptr->state_desc); + debug2("job %u being held, " + "assoc %u is at or exceeds " + "max jobs limit %u with %u for account %s", + job_ptr->job_id, assoc_ptr->id, + assoc_ptr->max_jobs, + assoc_ptr->used_jobs, assoc_ptr->acct); rc = false; goto end_it; } diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 208efb4deb6ac355a9fbd14dbf24ba05c9ec131e..43ab8238e848257728b36313bd8e074a086390c3 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2995,7 +2995,7 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, if (job_desc->task_dist != (uint16_t) NO_VAL) detail_ptr->task_dist = job_desc->task_dist; if (job_desc->cpus_per_task != (uint16_t) NO_VAL) - detail_ptr->cpus_per_task = MIN(job_desc->cpus_per_task, 1); + detail_ptr->cpus_per_task = MAX(job_desc->cpus_per_task, 1); if (job_desc->ntasks_per_node != (uint16_t) NO_VAL) detail_ptr->ntasks_per_node = job_desc->ntasks_per_node; if (job_desc->requeue != (uint16_t) NO_VAL) diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 242b7f6011f933fb17b95e2a4bb0beb9df0d1662..32e0694b312c07a4b855b23ffdbf547f74ee6acc 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -838,7 +838,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) if ((sep_ptr == NULL) && (job_id == 0)) { job_id = strtol(tok, &sep_ptr, 10); if ((sep_ptr == NULL) || (sep_ptr[0] != '\0') || - (job_id <= 0) || (job_id == job_ptr->job_id)) { + (job_id < 0) || (job_id == job_ptr->job_id)) { rc = EINVAL; break; } @@ -874,7 +874,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) while (rc == SLURM_SUCCESS) { job_id = strtol(sep_ptr, &sep_ptr2, 10); if ((sep_ptr2 == NULL) || - (job_id <= 0) || (job_id == job_ptr->job_id) || + (job_id < 0) || (job_id == job_ptr->job_id) || ((sep_ptr2[0] != '\0') && (sep_ptr2[0] != ',') && (sep_ptr2[0] != ':'))) { rc = EINVAL; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 291975de4b5aa15e8873b0ed710bc1611a654ec6..b035063953c980d3f7f3dcc25c3b6b0aee95ebd3 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -1406,6 +1406,7 @@ extern int step_create ( job_step_create_request_msg_t *step_specs, * IN step_node_list - node list of hosts in step * IN node_count - count of nodes in step allocation * IN num_tasks - number of tasks in step + * IN cpus_per_task - number of cpus per task * IN task_dist - type of task distribution * IN plane_size - size of plane (only needed for the plane distribution) * RET - NULL or slurm_step_layout_t * @@ -1416,6 +1417,7 @@ extern slurm_step_layout_t *step_layout_create(struct step_record *step_ptr, char *step_node_list, uint32_t node_count, uint32_t num_tasks, + uint16_t cpus_per_task, uint16_t task_dist, uint32_t plane_size); /* diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index b3f7094f57cdbde83231835bf952fec2dba7c285..8d48d6811679ddba83af36d67f2fbb4df91ed50e 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -1191,11 +1191,20 @@ step_create(job_step_create_request_msg_t *step_specs, /* a batch script does not need switch info */ if (!batch_step) { + /* we can figure out the cpus_per_task here by + reversing what happens in srun */ + int cpus_per_task = step_specs->cpu_count / + step_specs->num_tasks; +/* info(" we have %u / %u = %u", step_specs->cpu_count, */ +/* step_specs->num_tasks, cpus_per_task); */ + if(cpus_per_task < 1) + cpus_per_task = 1; step_ptr->step_layout = step_layout_create(step_ptr, step_node_list, step_specs->node_count, step_specs->num_tasks, + (uint16_t)cpus_per_task, step_specs->task_dist, step_specs->plane_size); if (!step_ptr->step_layout) { @@ -1228,6 +1237,7 @@ extern slurm_step_layout_t *step_layout_create(struct step_record *step_ptr, char *step_node_list, uint32_t node_count, uint32_t num_tasks, + uint16_t cpus_per_task, uint16_t task_dist, uint32_t plane_size) { @@ -1299,11 +1309,13 @@ extern slurm_step_layout_t *step_layout_create(struct step_record *step_ptr, break; } } - + /* layout the tasks on the nodes */ return slurm_step_layout_create(step_node_list, cpus_per_node, cpu_count_reps, - node_count, num_tasks, task_dist, + node_count, num_tasks, + cpus_per_task, + task_dist, plane_size); }