diff --git a/NEWS b/NEWS index 489f9ccc784028aa27fb93953007833186f7536c..74c286026f6d814ca5ba60e9bd8cef2b098db708 100644 --- a/NEWS +++ b/NEWS @@ -14,7 +14,8 @@ documents those changes that are of interest to users and admins. -- New srun task distribution options to -m: plane -- Multi-core support in sinfo, squeue, and scontrol. -- Memory can be treated as a consumable resource. - -- New srun options --ntasks_per_[node|socket|core]. + -- New srun options --ntasks-per-[node|socket|core]. + -- patch from HP patch.1.2.0.pre4.061017.crcore_hints * Changes in SLURM 1.2.0-pre3 ============================= diff --git a/doc/html/mc_support.shtml b/doc/html/mc_support.shtml index 8c838d2fb60e8de00a91c8a4648053781f10dbd6..3618f1ad8229e1204fefd4a71d3143b2d3ae43ee 100644 --- a/doc/html/mc_support.shtml +++ b/doc/html/mc_support.shtml @@ -113,7 +113,7 @@ to dedicate to a job (minimum or range) <td> maximum amount of real memory per node required by the job. </td></tr> <tr><td colspan=2> -<b><a href="#srun_ntasks">Task invocation as a function of logical processors</a></b> +<b><a href="#srun_ntasks">Task invocation control</a></b> </td></tr> <tr> <td> --ntasks-per-node=<i>ntasks</i></td> @@ -125,6 +125,19 @@ to dedicate to a job (minimum or range) <td> --ntasks-per-core=<i>ntasks</i></td> <td> number of tasks to invoke on each core </td></tr> +<tr><td colspan=2> +<b><a href="#srun_hints">Application hints</a></b> +</td></tr> +<tr> + <td> --hint=compute_bound</td> + <td> use all cores in each physical CPU +</td></tr> + <td> --hint=memory_bound</td> + <td> use only one core in each physical CPU +</td></tr> + <td> --hint=[no]multithread</td> + <td> [don't] use extra threads with in-core multi-threading +</td></tr> </table> <p> @@ -394,6 +407,54 @@ behavior of these flags: <p>See also 'srun --help' and 'man srun'</p> +<a name="srun_hints"> +<h3>Application hints</h3></a> + +Different applications will have various levels of resource +requirements. Some applications tend to be computationally intensive +but require little to no inter-process communication. Some applications +will be memory bound, saturating the memory bandwidth of a processor +before exhausting the computational capabilities. Other applications +will be highly communication intensive causing processes to block +awaiting messages from other processes. Applications with these +different properties tend to run well on a multi-core system given +the right mappings. + +For computationally intensive applications, all cores in a multi-core +system would normally be used. For memory bound applications, only +using a single core on each CPU will result in the highest per +core memory bandwidth. For communication intensive applications, +using in-core multi-threading (e.g. hyperthreading, SMT, or TMT) +may also improve performance. +The following command line flags can be used to communicate these +types of application hints to the SLURM multi-core support: + +<PRE> + --hint= Bind tasks according to application hints + compute_bound use all cores in each physical CPU + memory_bound use only one core in each physical CPU + [no]multithread [don't] use extra threads with in-core multi-threading + help show this help message +</PRE> + +For example, given a cluster with nodes containing two sockets, +each containing two cores, the following commands illustrate the +behavior of these flags: +<pre> + % srun -n 4 --hint=compute_bound --cpu_bind=verbose sleep 1 + setting affinity of task 0 pid 15425 on host hydra12 to mask 0x1 + setting affinity of task 2 pid 15427 on host hydra12 to mask 0x2 + setting affinity of task 1 pid 15426 on host hydra12 to mask 0x4 + setting affinity of task 3 pid 15428 on host hydra12 to mask 0x8 + + % srun -n 4 --hint=memory_bound --cpu_bind=verbose sleep 1 + setting affinity of task 1 pid 15551 on host hydra12 to mask 0x4 + setting affinity of task 0 pid 15550 on host hydra12 to mask 0x1 + setting affinity of task 2 pid 14974 on host <b>hydra13</b> to mask 0x1 + setting affinity of task 3 pid 14975 on host <b>hydra13</b> to mask 0x4 +</pre> + +<p>See also 'srun --hint=help' and 'man srun'</p> <!--------------------------------------------------------------------------> <a name=motivation> <h2>Motivation behind high-level srun flags</h2></a> @@ -987,4 +1048,3 @@ using NodeName: <!--#include virtual="footer.txt"--> - diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 9c552c32406aa9330d430d39970295012739290a..ed9063fb8e63cc3fd08c4ce1f75a1622c6ec56d2 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -565,14 +565,33 @@ instead of the node level. Masks will automatically be generated to bind the tasks to specific core unless \fB\-\-cpu_bind=none\fR is specified. .TP +\fB\-\-hint\fR=\fItype\fR +Bind tasks according to application hints +.RS +.TP +.B compute_bound +Select settings for compute bound applications: +use all cores in each physical CPU +.TP +.B memory_bound +Select settings for memory bound applications: +use only one core in each physical CPU +.TP +.B [no]multithread +[don't] use extra threads with in-core multi-threading +which can benefit communication intensive applications +.B help +show this help message +.RE +.TP \fB\-\-cpu_bind\fR=[{\fIquiet,verbose\fR},]\fItype\fR Bind tasks to CPUs .RS .TP -.B q[uiet], +.B q[uiet] quietly bind before task runs (default) .TP -.B v[erbose], +.B v[erbose] verbosely report binding before task runs .TP .B no[ne] @@ -590,8 +609,9 @@ with '0x' in which case they interpreted as hexadecimal values. .B mask_cpu:<list> bind by setting CPU masks on tasks as specified where <list> is <mask1>,<mask2>,...<maskN>. -CPU masks are \fBalways\fR interpreted as hexadecimal values but can be -preceded with an optional '0x'. +CPU masks are \fBalways\fR interpreted as hexadecimal values. +Note that masks must be preceded with a '0x' if they don't begin +with [0-9] so they are seen as numerical values by srun. .TP .B sockets auto\-generated masks bind to sockets @@ -655,10 +675,10 @@ options "\-\-cpu_bind=verbose,none \-\-mem_bind=verbose,none" to determine the specific configuration. .RS .TP -.B q[uiet], +.B q[uiet] quietly bind before task runs (default) .TP -.B v[erbose], +.B v[erbose] verbosely report binding before task runs .TP .B no[ne] @@ -680,8 +700,9 @@ with '0x' in which case they interpreted as hexadecimal values .B mask_mem:<list> bind by setting memory masks on tasks as specified where <list> is <mask1>,<mask2>,...<maskN>. -memory masks are \fBalways\fR interpreted as hexadecimal values but can be -preceded with an optional '0x' (not recommended) +memory masks are \fBalways\fR interpreted as hexadecimal values. +Note that masks must be preceded with a '0x' if they don't begin +with [0-9] so they are seen as numerical values by srun. .TP .B help show this help message diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 3aba2e85a278e73baae6b0c16cd9d999f44a5788..d3599f1bed426573ed005e0b0926a91b41282e7f 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -467,7 +467,7 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ char *account; /* charge to specified account */ char *network; /* network use spec */ char *comment; /* arbitrary comment (used by Moab scheduler) */ - uint16_t task_dist; /* see enum task_dist_state */ + uint16_t task_dist; /* see enum task_dist_state */ uint32_t plane_size; /* plane size when task_dist = SLURM_DIST_PLANE */ time_t begin_time; /* delay initiation until this time */ diff --git a/src/api/init_msg.c b/src/api/init_msg.c index c534e9c56716c59825507db269bd28414bb5c825..89c1bbc01155141d2da47df6af0e2538a9a52f1f 100644 --- a/src/api/init_msg.c +++ b/src/api/init_msg.c @@ -61,6 +61,11 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg) job_desc_msg->comment = NULL; job_desc_msg->contiguous = (uint16_t) NO_VAL; job_desc_msg->cpus_per_task = (uint16_t) NO_VAL; + job_desc_msg->ntasks_per_node = (uint16_t) NO_VAL; + job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL; + job_desc_msg->ntasks_per_core = (uint16_t) NO_VAL; + job_desc_msg->task_dist = SLURM_DIST_CYCLIC; + job_desc_msg->plane_size = NO_VAL; job_desc_msg->dependency = NO_VAL; job_desc_msg->environment = ((char **) NULL); job_desc_msg->env_size = 0; diff --git a/src/common/slurm_resource_info.c b/src/common/slurm_resource_info.c index 9a95843391bf0bdeeb7a9eccf263af2cdbe07c84..3b6e654073e8d28367b22dd172ad9ea3cd8f1142 100644 --- a/src/common/slurm_resource_info.c +++ b/src/common/slurm_resource_info.c @@ -70,25 +70,28 @@ * Note: used in both the select/{linear,cons_res} plugins. */ int slurm_get_avail_procs(const int mxsockets, - const int mxcores, - const int mxthreads, - const int cpuspertask, - const int ntaskspernode, - const int ntaskspersocket, - const int ntaskspercore, - int *cpus, - int *sockets, - int *cores, - int *threads, - const int alloc_sockets, - const int alloc_lps, - const select_type_plugin_info_t cr_type) + const int mxcores, + const int mxthreads, + const int cpuspertask, + const int ntaskspernode, + const int ntaskspersocket, + const int ntaskspercore, + int *cpus, + int *sockets, + int *cores, + int *threads, + const int alloc_sockets, + const int *alloc_cores, + const int alloc_lps, + const select_type_plugin_info_t cr_type) { int avail_cpus = 0, max_cpus = 0; + int max_avail_cpus = INT_MAX; /* for alloc_* accounting */ int max_sockets = mxsockets; int max_cores = mxcores; int max_threads = mxthreads; int cpus_per_task = cpuspertask; + int i; /* pick defaults for any unspecified items */ if (cpus_per_task <= 0) @@ -113,15 +116,19 @@ int slurm_get_avail_procs(const int mxsockets, *sockets, *cores, *threads); info("get_avail_procs Ntask node %d sockets %d core %d ", ntaskspernode, ntaskspersocket, ntaskspercore); - info("get_avail_procs cr_type %d Allocated sockets %d lps %d ", - cr_type, alloc_sockets, alloc_lps); + info("get_avail_procs cr_type %d cpus %d Allocated sockets %d lps %d ", + cr_type, *cpus, alloc_sockets, alloc_lps); + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) { + for (i = 0; i < *sockets; i++) + info("get_avail_procs alloc_cores[%d] = %d", i, alloc_cores[i]); + } #endif if ((*threads <= 0) || (*cores <= 0) || (*sockets <= 0)) fatal(" ((threads <= 0) || (cores <= 0) || (sockets <= 0))"); - switch(cr_type) { - /* Nodes have no notion of socket, core, threads. Only one level - of logical processors */ + switch(cr_type) { + /* For the following CR types, nodes have no notion of socket, core, + and thread. Only one level of logical processors */ case CR_CPU: case CR_CPU_MEMORY: case CR_MEMORY: @@ -142,7 +149,7 @@ int slurm_get_avail_procs(const int mxsockets, max_cpus = MIN(max_cpus, ntaskspernode); } break; - /* Nodes contain sockets, cores, threads */ + /* For all other types, nodes contain sockets, cores, and threads */ case CR_SOCKET: case CR_SOCKET_MEMORY: case CR_CORE: @@ -161,7 +168,16 @@ int slurm_get_avail_procs(const int mxsockets, break; case CR_CORE: case CR_CORE_MEMORY: - /* Not yet implemented */ + *cpus -= alloc_lps; + if (*cpus < 0) + error(" cons_res: *cpus < 0"); + + if (alloc_lps > 0) { + max_avail_cpus = 0; + for (i=0; i<*sockets; i++) + max_avail_cpus += *cores - alloc_cores[i]; + max_avail_cpus *= *threads; + } break; default: break; @@ -175,16 +191,19 @@ int slurm_get_avail_procs(const int mxsockets, /*** compute an overall maximum cpu count honoring ntasks* ***/ max_cpus = *threads; if (ntaskspercore > 0) { - max_threads = MIN(max_cpus, ntaskspercore); + max_cpus = MIN(max_cpus, ntaskspercore); } max_cpus *= *cores; if (ntaskspersocket > 0) { - max_cpus = MIN(max_cpus, ntaskspersocket); + max_cpus = MIN(max_cpus, ntaskspersocket); } max_cpus *= *sockets; if (ntaskspernode > 0) { max_cpus = MIN(max_cpus, ntaskspernode); } + + /*** honor any availability maximum ***/ + max_cpus = MIN(max_cpus, max_avail_cpus); break; } @@ -196,6 +215,11 @@ int slurm_get_avail_procs(const int mxsockets, avail_cpus = MIN(avail_cpus, max_cpus); +#if(0) + info("get_avail_procs return cpus %d sockets %d cores %d threads %d ", + *cpus, *sockets, *cores, *threads); + info("get_avail_procs avail_cpus %d", avail_cpus); +#endif return(avail_cpus); } @@ -208,7 +232,7 @@ int slurm_get_avail_procs(const int mxsockets, */ void slurm_sprint_cpu_bind_type(char *str, cpu_bind_type_t cpu_bind_type) { - if (!str) + if (!str) return; str[0] = '\0'; @@ -246,7 +270,7 @@ void slurm_sprint_cpu_bind_type(char *str, cpu_bind_type_t cpu_bind_type) */ void slurm_sprint_mem_bind_type(char *str, mem_bind_type_t mem_bind_type) { - if (!str) + if (!str) return; str[0] = '\0'; diff --git a/src/common/slurm_resource_info.h b/src/common/slurm_resource_info.h index b9533f67e21807b5c389236b2f750e8dca367480..f74e0aadfde282495496dce754a571f656cce6b8 100644 --- a/src/common/slurm_resource_info.h +++ b/src/common/slurm_resource_info.h @@ -61,6 +61,7 @@ int slurm_get_avail_procs(const int mxsockets, int *cores, int *threads, const int alloc_sockets, + const int *alloc_cores, const int alloc_lps, const select_type_plugin_info_t cr_type); diff --git a/src/common/slurm_selecttype_info.c b/src/common/slurm_selecttype_info.c index 153cd4ec8e4a86134d564282d39a04a8c549d999..0c3fcd530d445c13e8205a7feaa32c405f2acb6c 100644 --- a/src/common/slurm_selecttype_info.c +++ b/src/common/slurm_selecttype_info.c @@ -46,7 +46,7 @@ * * Return SLURM_SUCCESS on success, or SLURM_ERROR otherwise */ -extern int parse_select_type_param(char *select_type_parameters, +int parse_select_type_param(char *select_type_parameters, select_type_plugin_info_t *param) { int rc = SLURM_SUCCESS; @@ -56,7 +56,7 @@ extern int parse_select_type_param(char *select_type_parameters, char *st_str = xstrdup(select_type_parameters); if ((str_parameters = strtok(st_str,",")) != NULL) { do { - if (strcasecmp(str_parameters, "CR_SOCKET") == 0) { + if (strcasecmp(str_parameters, "CR_Socket") == 0) { *param = CR_SOCKET; } else if (strcasecmp(str_parameters, "CR_Socket_Memory") == 0) { *param = CR_SOCKET_MEMORY; diff --git a/src/common/slurm_selecttype_info.h b/src/common/slurm_selecttype_info.h index d07efd7c6d356fd42346414072f939c063b0b5fd..e8156d78d2b00589d0a053e1fa3814bf18f5b4aa 100644 --- a/src/common/slurm_selecttype_info.h +++ b/src/common/slurm_selecttype_info.h @@ -43,7 +43,7 @@ #include <string.h> #include <slurm/slurm.h> -extern int parse_select_type_param(char *select_type_parameters, +int parse_select_type_param(char *select_type_parameters, select_type_plugin_info_t *param); #endif /*__SLURM_SELECTTYPE_INFO_H__*/ diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index 25276c7b02f9135b3dc7654138b4c3efc0f8cf35..600778407903a5461d341342159b6a7e8eac8aff 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -55,8 +55,6 @@ * The advantage of the consumable resource scheduling policy is that * the job throughput can increase dramatically. * - * $Id$ - * ***************************************************************************** * Copyright (C) 2005-2006 Hewlett-Packard Development Company, L.P. * Written by Susanne M. Balle <susanne.balle@hp.com>, who borrowed heavily @@ -115,6 +113,10 @@ #include "src/common/slurm_resource_info.h" #include "src/slurmctld/slurmctld.h" +#if 0 +#define CR_DEBUG 1 +#endif + /* * These variables are required by the generic plugin interface. If they * are not found in the plugin, the plugin loader will ignore it. @@ -155,7 +157,8 @@ struct node_cr_record { struct node_record *node_ptr; /* ptr to the node that own these resources */ uint32_t alloc_lps; /* cpu count reserved by already scheduled jobs */ uint32_t alloc_sockets; /* socket count reserved by already scheduled jobs */ - uint32_t *alloc_cores; /* core count reserved by already scheduled jobs */ + uint32_t *alloc_cores; /* core count per socket reserved by + * already scheduled jobs */ uint32_t alloc_memory; /* real memory reserved by already scheduled jobs */ struct node_cr_record *node_next;/* next entry with same hash index */ }; @@ -169,10 +172,14 @@ struct select_cr_job { int nhosts; /* number of hosts allocated to job */ char **host; /* hostname vector */ int *cpus; /* number of processors on each host */ - int *alloc_lps; /* number of allocated threads/lps on each host */ - int *alloc_sockets; /* number of allocated sockets on each host */ - int **alloc_cores; /* Allocated cores per socket on each host */ - int *alloc_memory; /* number of allocated MB of real memory on each host */ + int *alloc_lps; /* number of allocated threads/lps on + * each host */ + int *alloc_sockets; /* number of allocated sockets on each + * host */ + int **alloc_cores; /* number of allocated cores on each + * host */ + int *alloc_memory; /* number of allocated MB of real + * memory on each host */ int max_sockets; int max_cores; int max_threads; @@ -349,14 +356,13 @@ static void _get_resources_this_node(int *cpus, } *alloc_sockets = this_cr_node->alloc_sockets; *alloc_lps = this_cr_node->alloc_lps; -#if(0) - info("cons_res %d _get_resources host %s HW_ cpus %d sockets %d cores %d threads %d ", + + debug3("cons_res %d _get_resources host %s HW_ cpus %d sockets %d cores %d threads %d ", *jobid, this_cr_node->node_ptr->name, *cpus, *sockets, *cores, *threads); - info("cons_res %d _get_resources host %s Alloc_ sockets %d lps %d ", + debug3("cons_res %d _get_resources host %s Alloc_ sockets %d lps %d ", *jobid, this_cr_node->node_ptr->name, *alloc_sockets, *alloc_lps); -#endif } /* @@ -405,7 +411,7 @@ static int _get_avail_lps(struct job_record *job_ptr, int max_sockets = 0, max_cores = 0, max_threads = 0; int ntasks_per_node = 0, ntasks_per_socket = 0, ntasks_per_core = 0; int cpus, sockets, cores, threads; - int alloc_sockets = 0, alloc_lps = 0; + int alloc_sockets = 0, alloc_lps = 0; struct node_cr_record *this_cr_node; if (job_ptr->details && job_ptr->details->cpus_per_task) @@ -416,6 +422,12 @@ static int _get_avail_lps(struct job_record *job_ptr, max_cores = job_ptr->details->max_cores; if (job_ptr->details && job_ptr->details->max_threads) max_threads = job_ptr->details->max_threads; + if (job_ptr->details && job_ptr->details->ntasks_per_node) + ntasks_per_node = job_ptr->details->ntasks_per_node; + if (job_ptr->details && job_ptr->details->ntasks_per_socket) + ntasks_per_socket = job_ptr->details->ntasks_per_socket; + if (job_ptr->details && job_ptr->details->ntasks_per_core) + ntasks_per_core = job_ptr->details->ntasks_per_core; this_cr_node = _find_cr_node_record (select_node_ptr[index].node_ptr->name); if (this_cr_node == NULL) { @@ -425,7 +437,7 @@ static int _get_avail_lps(struct job_record *job_ptr, return avail_cpus; } _get_resources_this_node(&cpus, &sockets, &cores, &threads, - this_cr_node, &alloc_sockets, + this_cr_node, &alloc_sockets, &alloc_lps, &job_ptr->job_id); if (all_available) { alloc_sockets = 0; @@ -441,6 +453,7 @@ static int _get_avail_lps(struct job_record *job_ptr, ntasks_per_core, &cpus, &sockets, &cores, &threads, alloc_sockets, + (int *)this_cr_node->alloc_cores, alloc_lps, cr_type); return(avail_cpus); } @@ -500,29 +513,59 @@ static int _compute_c_b_task_dist(struct select_cr_job *job) int alloc_lps = 0; _get_resources_this_node(&cpus, &sockets, &cores, &threads, - this_node, &alloc_sockets, - &alloc_lps, &job->job_id); - - avail_cpus = slurm_get_avail_procs(job->max_sockets, - job->max_cores, - job->max_threads, - job->cpus_per_task, - job->ntasks_per_node, - job->ntasks_per_socket, - job->ntasks_per_core, - &cpus, - &sockets, - &cores, - &threads, - alloc_sockets, - alloc_lps, - cr_type); + this_node, + &alloc_sockets, + &alloc_lps, + &job->job_id); + + avail_cpus = slurm_get_avail_procs( + job->max_sockets, + job->max_cores, + job->max_threads, + job->cpus_per_task, + job->ntasks_per_node, + job->ntasks_per_socket, + job->ntasks_per_core, + &cpus, + &sockets, + &cores, + &threads, + alloc_sockets, + (int *)this_node->alloc_cores, + alloc_lps, + cr_type); break; } case CR_CORE: case CR_CORE_MEMORY: - /* Not implemented yet */ + { + int alloc_sockets = 0; + int alloc_lps = 0; + _get_resources_this_node(&cpus, &sockets, + &cores, &threads, + this_node, + &alloc_sockets, + &alloc_lps, + &job->job_id); + + avail_cpus = slurm_get_avail_procs( + job->max_sockets, + job->max_cores, + job->max_threads, + job->cpus_per_task, + job->ntasks_per_node, + job->ntasks_per_socket, + job->ntasks_per_core, + &cpus, + &sockets, + &cores, + &threads, + alloc_sockets, + (int *)this_node->alloc_cores, + alloc_lps, + cr_type); break; + } default: /* We should never get in here. If we do it is a bug */ @@ -569,12 +612,15 @@ static int _compute_c_b_task_dist(struct select_cr_job *job) * avoiding interference between co-allocated running jobs. * * In the consumable resources environment we need to determine the - * CPU or core layout schema within slurmctld. + * layout schema within slurmctld. */ static int _cr_dist(struct select_cr_job *job, const int cyclic) { - int rc = SLURM_SUCCESS; - int taskcount = 0; +#if(CR_DEBUG) + int i; +#endif + int j, rc = SLURM_SUCCESS; + int taskcount = 0; int maxtasks = job->nprocs; int host_index; int usable_cpus = 0; @@ -604,7 +650,7 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic) if (bit_test(job->node_bitmap, host_index) == 0) continue; job_index++; - + this_cr_node = _find_cr_node_record( node_record_table_ptr[host_index].name); if (this_cr_node == NULL) { @@ -613,7 +659,7 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic) return SLURM_ERROR; } - _get_resources_this_node(&usable_cpus, &usable_sockets, + _get_resources_this_node(&usable_cpus, &usable_sockets, &usable_cores, &usable_threads, this_cr_node, &alloc_sockets, &alloc_lps, &job->job_id); @@ -630,13 +676,22 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic) &usable_cores, &usable_threads, alloc_sockets, + (int *)this_cr_node-> + alloc_cores, alloc_lps, cr_type); -#if(0) - info("_cr_dist %u avail_s %d _c %d _t %d alloc_s %d alloc_lps %d ", + +#if(CR_DEBUG) + info("cons_res: _cr_dist %u avail_s %d _c %d _t %d " + "alloc_s %d lps %d ", job->job_id, usable_sockets, usable_cores, usable_threads, alloc_sockets, alloc_lps); -#endif + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) + for(i=0; i<usable_sockets;i++) + info("cons_res: _cr_dist alloc_cores %d = %d", + i, this_cr_node->alloc_cores[i]); +#endif + if (avail_cpus == 0) { error(" cons_res: no available cpus on node %s", node_record_table_ptr[host_index].name); @@ -644,10 +699,19 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic) maxtasks = job->alloc_lps[job_index]; taskcount = 0; job->alloc_sockets[job_index] = 0; + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) { + for (j = 0; + j < node_record_table_ptr[host_index].cores; + j++) + job->alloc_cores[job_index][j] = 0; + } if (cyclic == 0) { /* block lllp distribution */ int s, c, t; + int c_ok = 0; last_socket_index = -1; + int socket_cnt = -1; + int core_cnt = 0; for (s=0; s < usable_sockets; s++) { last_core_index = -1; @@ -662,21 +726,40 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic) if (maxtasks <= taskcount) continue; if (last_socket_index != s) { - job->alloc_sockets[job_index]++; -#if(0) - info("block jid %u s %d c %d t %d tc %d", - job->job_id, s, c, t, - taskcount); -#endif + job->alloc_sockets + [job_index]++; last_socket_index = s; } + if (((cr_type == CR_CORE) + || (cr_type + == CR_CORE_MEMORY)) + && (c_ok == 0)) { + if (socket_cnt >= + node_record_table_ptr[host_index].sockets) + continue; + core_cnt = job->alloc_cores[job_index][socket_cnt] + + this_cr_node->alloc_cores[socket_cnt]; + if (core_cnt >= + node_record_table_ptr[host_index].cores) { + socket_cnt++; + } + job->alloc_cores[job_index][socket_cnt]++; + info("cons_res %u BLOCK job->alloc_cores[%d][%d] = %d", + job->job_id, job_index, socket_cnt, + job->alloc_cores[job_index][socket_cnt]); + if (c == (usable_cores-1)) + c_ok = 1; + } taskcount++; } } + socket_cnt++; } } else if (cyclic == 1) { /* cyclic lllp distribution */ int s, c, t; int max_s = 0; + int socket_cnt = -1; + int core_cnt = 0; for (t=0; t < usable_threads; t++) { if (maxtasks <= taskcount) @@ -694,20 +777,40 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic) if(s == (usable_sockets-1)) max_s = 1; } + if (((cr_type == CR_CORE) + || (cr_type == CR_CORE_MEMORY)) + && (t == 0)) { + socket_cnt++; + if (socket_cnt >= + node_record_table_ptr[host_index].sockets) + continue; + core_cnt = job->alloc_cores[job_index][socket_cnt] + + this_cr_node->alloc_cores[socket_cnt]; + if (core_cnt >= + node_record_table_ptr[host_index].cores) { + socket_cnt++; + } + job->alloc_cores[job_index][socket_cnt]++; + info("cons_res %u CYCLIC job->alloc_cores[%d][%d] = %d", + job->job_id, job_index, socket_cnt, + job->alloc_cores[job_index][socket_cnt]); + } + taskcount++; -#if(0) - info("cyclic jid %u s %d c %d t %d tc %d", - job->job_id, s, c, t, taskcount); -#endif } } } } -#if(0) +#if(CR_DEBUG) info("cons_res _cr_dist %u cyclic %d host %d %s alloc_ " "sockets %d lps %d ", job->job_id, cyclic, host_index, this_cr_node->node_ptr->name, job->alloc_sockets[job_index], job->alloc_lps[job_index]); + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) + for(i=0; i<usable_sockets;i++) + info("cons_res _cr_dist: %u alloc_cores[%d][%d] = %d", + job->job_id, i, job_index, + job->alloc_cores[job_index][i]); #endif } return rc; @@ -720,7 +823,7 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic) */ static int _cr_exclusive_dist(struct select_cr_job *job) { - int i; + int i, j; int host_index = 0; for (i = 0; i < node_record_count; i++) { @@ -729,6 +832,11 @@ static int _cr_exclusive_dist(struct select_cr_job *job) job->alloc_lps[host_index] = node_record_table_ptr[i].cpus; job->alloc_sockets[host_index] = node_record_table_ptr[i].sockets; + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) { + for (j = 0; j < node_record_table_ptr[i].sockets; j++) + job->alloc_cores[host_index][j] = + node_record_table_ptr[i].cores; + } host_index++; } return SLURM_SUCCESS; @@ -744,9 +852,12 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size) int i, j, k, l, m, host_index; int usable_cpus, usable_sockets, usable_cores, usable_threads; int taskcount=0, last_socket_index; - int socket_index, core_index, thread_index; + int socket_index, core_index, thread_index; + int c_ok = 0; int job_index = -1; - + int socket_cnt = 0; + int core_cnt = 0; + debug3("cons_res _cr_plane_dist plane_size %d ", plane_size); debug3("cons_res _cr_plane_dist maxtasks %d num_hosts %d", maxtasks, num_hosts); @@ -770,12 +881,13 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size) } } } -#if(0) +#if(CR_DEBUG) for (i = 0; i < job->nhosts; i++) { info("cons_res _cr_plane_dist %u host %s alloc_ lps %d ", job->job_id, job->host[i], job->alloc_lps[i]); } #endif + for (host_index = 0; ((host_index < node_record_count) && (taskcount < job->nprocs)); host_index++) { @@ -801,27 +913,40 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size) &alloc_lps, &job->job_id); avail_cpus = slurm_get_avail_procs(job->max_sockets, - job->max_cores, - job->max_threads, - job->cpus_per_task, - job->ntasks_per_node, - job->ntasks_per_socket, - job->ntasks_per_core, - &usable_cpus, - &usable_sockets, - &usable_cores, - &usable_threads, - alloc_sockets, - alloc_lps, - cr_type); + job->max_cores, + job->max_threads, + job->cpus_per_task, + job->ntasks_per_node, + job->ntasks_per_socket, + job->ntasks_per_core, + &usable_cpus, + &usable_sockets, + &usable_cores, + &usable_threads, + alloc_sockets, + (int *)this_cr_node-> + alloc_cores, + alloc_lps, + cr_type); + if (avail_cpus == 0) { error(" cons_res: no available cpus on node %s", node_record_table_ptr[host_index].name); } + job->alloc_sockets[job_index] = 0; + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) { + for (j = 0; + j < node_record_table_ptr[host_index].cores; + j++) + job->alloc_cores[job_index][j] = 0; + } + maxtasks = job->alloc_lps[job_index]; last_socket_index = -1; next = 0; + socket_cnt = 0; + core_cnt = 0; for (j=0; next<maxtasks; j++) { for (socket_index=0; ((socket_index<usable_sockets) @@ -852,16 +977,43 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size) last_socket_index = socket_index; } + if (((cr_type == CR_CORE) || + (cr_type == CR_CORE_MEMORY)) + && (c_ok == 0)) { + if (socket_cnt >= + node_record_table_ptr[host_index].sockets) + continue; + core_cnt = job->alloc_cores[job_index][socket_cnt] + + this_cr_node->alloc_cores[socket_cnt]; + if (core_cnt >= + node_record_table_ptr[host_index].cores) { + socket_cnt++; + } + job->alloc_cores[job_index][socket_cnt]++; + info("cons_res %u PLANE job->alloc_cores[%d][%d] = %d", + job->job_id, job_index, socket_cnt, + job->alloc_cores[job_index][socket_cnt]); + if (m == (usable_cores-1)) + c_ok = 1; + } next++; } } + socket_cnt++; } } -#if(0) +#if(CR_DEBUG) info("cons_res _cr_plane_dist %u host %d %s alloc_ " - "sockets %d lps %d ", + "s %d c %d lps %d ", job->job_id, host_index, this_cr_node->node_ptr->name, job->alloc_sockets[job_index], job->alloc_lps[job_index]); + int i = 0; + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) { + for (i = 0; i < this_cr_node->node_ptr->sockets; i++) + info("cons_res _cr_plane_dist %u host %d %s alloc_cores %d", + job->job_id, host_index, this_cr_node->node_ptr->name, + job->alloc_cores[job_index][i]); + } #endif } return SLURM_SUCCESS; @@ -870,11 +1022,18 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size) /* xfree a select_cr_job job */ static void _xfree_select_cr_job(struct select_cr_job *job) { + int i; + xfree(job->host); xfree(job->cpus); - xfree(job->alloc_lps); + xfree(job->alloc_lps); xfree(job->alloc_sockets); xfree(job->alloc_memory); + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) { + for (i = 0; i < job->nhosts; i++) + xfree(job->alloc_cores[i]); + xfree(job->alloc_cores); + } FREE_NULL_BITMAP(job->node_bitmap); xfree(job); } @@ -960,7 +1119,20 @@ static void _count_cpus(unsigned *bitmap, int sum) case CR_CORE: case CR_CORE_MEMORY: { - /* FIXME */ + int core_cnt = 0; + for (i = 0; i < this_node->node_ptr->sockets; i++) + core_cnt += this_node->alloc_cores[i]; + if (slurmctld_conf.fast_schedule) { + sum += ((node_record_table_ptr[i].config_ptr->sockets + * node_record_table_ptr[i].config_ptr->cores) + - core_cnt) + * node_record_table_ptr[i].config_ptr->threads; + } else { + sum += ((node_record_table_ptr[i].sockets + * node_record_table_ptr[i].cores) + - core_cnt) + * node_record_table_ptr[i].threads; + } break; } case CR_MEMORY: @@ -1030,7 +1202,7 @@ static int _synchronize_bitmaps(bitstr_t ** partially_idle_bitmap) static int _clear_select_jobinfo(struct job_record *job_ptr) { - int rc = SLURM_SUCCESS, i, nodes, job_id; + int rc = SLURM_SUCCESS, i, j, nodes, job_id; struct select_cr_job *job = NULL; ListIterator iterator; @@ -1059,13 +1231,11 @@ static int _clear_select_jobinfo(struct job_record *job_ptr) goto out; } + /* Updating this node allocated resources */ switch(cr_type) { case CR_SOCKET: case CR_SOCKET_MEMORY: - case CR_CORE: - case CR_CORE_MEMORY: - /* Updating this node allocated resources */ - this_node->alloc_lps -= job->alloc_lps[i]; + this_node->alloc_lps -= job->alloc_lps[i]; this_node->alloc_sockets -= job->alloc_sockets[i]; if ((this_node->alloc_lps < 0) || (this_node->alloc_sockets < 0)) { error(" alloc_lps < 0 %d on %s", @@ -1076,8 +1246,44 @@ static int _clear_select_jobinfo(struct job_record *job_ptr) rc = SLURM_ERROR; goto out; } - if ((cr_type == CR_SOCKET) || (cr_type == CR_CORE)) - break; + this_node->alloc_memory -= job->alloc_memory[i]; + if (this_node->alloc_memory < 0) { + error(" alloc_memory < 0 %d on %s", + this_node->alloc_memory, + this_node->node_ptr->name); + this_node->alloc_memory = 0; + rc = SLURM_ERROR; + goto out; + } + break; + case CR_CORE: + case CR_CORE_MEMORY: + this_node->alloc_lps -= job->alloc_lps[i]; + for (j =0; j < this_node->node_ptr->sockets; j++) + this_node->alloc_cores[j] -= job->alloc_cores[i][j]; + for (j =0; j < this_node->node_ptr->sockets; j++) { + if ((this_node->alloc_lps >= 0) || (this_node->alloc_cores[j] >= 0)) { + continue; + } else { + error(" alloc_lps < 0 %d on %s", + this_node->alloc_lps, + this_node->node_ptr->name); + this_node->alloc_lps = 0; + this_node->alloc_cores = 0; + rc = SLURM_ERROR; + goto out; + } + } + this_node->alloc_memory -= job->alloc_memory[i]; + if (this_node->alloc_memory < 0) { + error(" alloc_memory < 0 %d on %s", + this_node->alloc_memory, + this_node->node_ptr->name); + this_node->alloc_memory = 0; + rc = SLURM_ERROR; + goto out; + } + break; case CR_MEMORY: this_node->alloc_memory -= job->alloc_memory[i]; if (this_node->alloc_memory < 0) { @@ -1116,10 +1322,16 @@ static int _clear_select_jobinfo(struct job_record *job_ptr) default: break; } -#if(1) - info("cons_res %u _clear_select_jobinfo (-) node %s alloc_ lps %d sockets %d ", - job->job_id, this_node->node_ptr->name, this_node->alloc_lps, - this_node->alloc_sockets); +#if(CR_DEBUG) + info("cons_res %u _clear_select_jobinfo (-) node %s alloc_ s %d lps %d", + job->job_id, this_node->node_ptr->name, + this_node->alloc_sockets, + this_node->alloc_lps); + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) + for (j =0; j < this_node->node_ptr->sockets; j++) + info("cons_res %u _clear_select_jobinfo (-) node %s alloc_ c %d", + job->job_id, this_node->node_ptr->name, + this_node->alloc_cores[j]); #endif } out: @@ -1209,7 +1421,7 @@ extern int select_p_job_init(List job_list) extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) { - int i; + int i, j; if (node_ptr == NULL) { error("select_g_node_init: node_ptr == NULL"); @@ -1228,9 +1440,16 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) for (i = 0; i < select_node_cnt; i++) { select_node_ptr[i].node_ptr = &node_ptr[i]; - select_node_ptr[i].alloc_lps = 0; - select_node_ptr[i].alloc_sockets = 0; - select_node_ptr[i].alloc_memory = 0; + select_node_ptr[i].alloc_lps = 0; + select_node_ptr[i].alloc_sockets = 0; + select_node_ptr[i].alloc_memory = 0; + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) { + select_node_ptr[i].alloc_cores = + xmalloc(sizeof(int) * select_node_ptr->node_ptr->sockets); + for (j = 0; j < select_node_ptr->node_ptr->sockets; j++) { + select_node_ptr[i].alloc_cores[j] = 0; + } + } } select_fast_schedule = slurm_get_fast_schedule(); @@ -1488,7 +1707,7 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, goto cleanup; if (!test_only) { - int jobid, job_nodecnt, j; + int jobid, job_nodecnt, j, k; bitoff_t size; static struct select_cr_job *job; job = xmalloc(sizeof(struct select_cr_job)); @@ -1515,13 +1734,17 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, bit_set(job->node_bitmap, i); } - job->host = (char **) xmalloc(job->nhosts * sizeof(char *)); - job->cpus = (int *) xmalloc(job->nhosts * sizeof(int)); - - /* Build number of needed lps for each hosts for this job */ - job->alloc_lps = (int *) xmalloc(job->nhosts * sizeof(int)); + job->host = (char **) xmalloc(job->nhosts * sizeof(char *)); + job->cpus = (int *) xmalloc(job->nhosts * sizeof(int)); + job->alloc_lps = (int *) xmalloc(job->nhosts * sizeof(int)); job->alloc_sockets = (int *) xmalloc(job->nhosts * sizeof(int)); job->alloc_memory = (int *) xmalloc(job->nhosts * sizeof(int)); + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) { + job->alloc_cores = (int **) xmalloc(job->nhosts * sizeof(int *)); + for (i = 0; i < job->nhosts; i++) + job->alloc_cores[i] = (int *) xmalloc( + node_record_table_ptr[i].sockets * sizeof(int)); + } j = 0; for (i = 0; i < node_record_count; i++) { @@ -1529,13 +1752,16 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, continue; job->host[j] = node_record_table_ptr[i].name; job->cpus[j] = node_record_table_ptr[i].cpus; - job->alloc_memory[j] = job_ptr->details->job_max_memory; - job->alloc_lps[j] = 0; + job->alloc_lps[j] = 0; job->alloc_sockets[j] = 0; + job->alloc_memory[j] = job_ptr->details->job_max_memory; + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) { + for (k = 0; k < node_record_table_ptr[i].sockets; k++) + job->alloc_cores[j][k] = 0; + } j++; } - - /* check for error SMB Fixme */ + debug3("cons_res %u task_dist %d", job_ptr->job_id, job_ptr->details->task_dist); if (job_ptr->details->shared == 0) { /* Nodes need to be allocated in dedicated @@ -1576,14 +1802,6 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, } if (error_code != SLURM_SUCCESS) goto cleanup; -#if(0) - /* debugging only Remove */ - for (i = 0; i < job->nhosts; i++) { - debug3("cons_res: job: %u after _cr_dist host %s cpus %u alloc_lps %d alloc_sockets %d", - job->job_id, job->host[i], job->cpus[i], - job->alloc_lps[i], job->alloc_sockets[i]); - } -#endif _append_to_job_list(job); } @@ -1751,6 +1969,7 @@ extern int select_p_get_extra_jobinfo(struct node_record *node_ptr, { int rc = SLURM_SUCCESS, i, avail = 0; uint32_t *tmp_32 = (uint32_t *) data; + *tmp_32 = 0; xassert(job_ptr); xassert(job_ptr->magic == JOB_MAGIC); @@ -1816,22 +2035,13 @@ extern int select_p_get_extra_jobinfo(struct node_record *node_ptr, * given job for a specific node --> based * on the output from _cr_dist */ switch(cr_type) { + case CR_MEMORY: + *tmp_32 = node_ptr->cpus; + break; case CR_SOCKET: case CR_SOCKET_MEMORY: - /* Number of hardware resources allocated - for this job. This might be more than - what the job requires since we - only allocated whole sockets at - this level */ - *tmp_32 = job->alloc_lps[i]; - break; case CR_CORE: case CR_CORE_MEMORY: - /* Not yet implemented */ - break; - case CR_MEMORY: - *tmp_32 = node_ptr->cpus; - break; case CR_CPU: case CR_CPU_MEMORY: default: @@ -1845,6 +2055,7 @@ extern int select_p_get_extra_jobinfo(struct node_record *node_ptr, } if (!job) { debug3("cons_res: job %d not active", job_ptr->job_id); + *tmp_32 = 0; } cleanup: list_iterator_destroy(iterator); @@ -1863,7 +2074,7 @@ extern int select_p_get_select_nodeinfo(struct node_record *node_ptr, enum select_data_info dinfo, void *data) { - int rc = SLURM_SUCCESS; + int rc = SLURM_SUCCESS, i; struct node_cr_record *this_cr_node; xassert(node_ptr); @@ -1917,7 +2128,10 @@ extern int select_p_get_select_nodeinfo(struct node_record *node_ptr, break; case CR_CORE: case CR_CORE_MEMORY: - /* FIXME */ + *tmp_32 = 0; + for (i = 0; i < this_cr_node->node_ptr->sockets; i++) + *tmp_32 += this_cr_node->alloc_cores[i] * + node_ptr->threads; break; case CR_MEMORY: *tmp_32 = 0; @@ -1939,7 +2153,7 @@ extern int select_p_get_select_nodeinfo(struct node_record *node_ptr, extern int select_p_update_nodeinfo(struct job_record *job_ptr) { - int rc = SLURM_SUCCESS, i, job_id, nodes; + int rc = SLURM_SUCCESS, i, j, job_id, nodes; struct select_cr_job *job = NULL; ListIterator iterator; @@ -1965,29 +2179,39 @@ extern int select_p_update_nodeinfo(struct job_record *job_ptr) rc = SLURM_ERROR; goto cleanup; } + /* Updating this node's allocated resources */ switch (cr_type) { - case CR_SOCKET: - case CR_CORE: case CR_SOCKET_MEMORY: - case CR_CORE_MEMORY: - /* Updating this node's allocated resources */ + this_node->alloc_memory += job->alloc_memory[i]; + case CR_SOCKET: this_node->alloc_lps += job->alloc_lps[i]; this_node->alloc_sockets += job->alloc_sockets[i]; if (this_node->alloc_sockets > this_node->node_ptr->sockets) error("Job %u Host %s too many allocated sockets %d", job->job_id, this_node->node_ptr->name, this_node->alloc_sockets); - if ((cr_type == CR_SOCKET) - || (cr_type == CR_CORE)) - break; - case CR_MEMORY: this_node->alloc_memory += job->alloc_memory[i]; break; - case CR_CPU: + case CR_CORE_MEMORY: + this_node->alloc_memory += job->alloc_memory[i]; + case CR_CORE: + this_node->alloc_lps += job->alloc_lps[i]; + for (j = 0; j < this_node->node_ptr->sockets; j++) + this_node->alloc_cores[j] += job->alloc_cores[i][j]; + for (j = 0; j < this_node->node_ptr->sockets; j++) + if (this_node->alloc_cores[j] <= this_node->node_ptr->cores) + continue; + else + error("Job %u Host %s too many allocated cores %d for socket %d ", + job->job_id, this_node->node_ptr->name, + this_node->alloc_cores[j], j); + break; case CR_CPU_MEMORY: + this_node->alloc_memory += job->alloc_memory[i]; + case CR_CPU: this_node->alloc_lps += job->alloc_lps[i]; - if (cr_type == CR_CPU) - break; + break; + case CR_MEMORY: this_node->alloc_memory += job->alloc_memory[i]; break; default: @@ -1995,11 +2219,16 @@ extern int select_p_update_nodeinfo(struct job_record *job_ptr) rc = SLURM_ERROR; break; } -#if(1) +#if(CR_DEBUG) /* Remove debug only */ info("cons_res %u update_nodeinfo (+) node %s alloc_ lps %d sockets %d mem %d ", job->job_id, this_node->node_ptr->name, this_node->alloc_lps, this_node->alloc_sockets, this_node->alloc_memory); + if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) + for (j = 0; j < this_node->node_ptr->sockets; j++) + info("cons_res %u update_nodeinfo (+) node %s alloc_ cores %d ", + job->job_id, this_node->node_ptr->name, + this_node->alloc_cores[j]); #endif } } diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 2721fd01f5817113d65bd0c1e429654889df1e7f..8ddc29b6b924c027cff3972e58601569c30ec074 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -355,7 +355,7 @@ int get_avail_cpus(struct job_record *job_ptr, int index) max_sockets, max_cores, max_threads, cpus_per_task, ntasks_per_node, ntasks_per_socket, ntasks_per_core, &cpus, &sockets, &cores, &threads, - 0, 0, SELECT_TYPE_INFO_NONE); + 0, NULL, 0, SELECT_TYPE_INFO_NONE); #if 0 debug3("avail_cpus index %d = %d (out of %d %d %d %d)", diff --git a/src/plugins/task/affinity/Makefile.in b/src/plugins/task/affinity/Makefile.in index 48aed618c9fdf61f1c7ab74da57835eec74adabd..0f924784eaaa37d7ef8b5f1ed14e73dc5a9e935e 100644 --- a/src/plugins/task/affinity/Makefile.in +++ b/src/plugins/task/affinity/Makefile.in @@ -306,6 +306,7 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common @HAVE_SCHED_SETAFFINITY_FALSE@ schedutils.c \ @HAVE_SCHED_SETAFFINITY_FALSE@ task_affinity.c +task_affinity_la_LIBADD = $(top_builddir)/src/common/libcommon.la all: all-am .SUFFIXES: diff --git a/src/plugins/task/affinity/dist_tasks.c b/src/plugins/task/affinity/dist_tasks.c index c4dbf4036c32fb9c2e790e67ce609aa3da9762b3..ec38c81730fcac0ed03bbe118bc3e209e606940c 100644 --- a/src/plugins/task/affinity/dist_tasks.c +++ b/src/plugins/task/affinity/dist_tasks.c @@ -89,7 +89,8 @@ static void _get_resources_this_node(int *cpus, int *sockets, int *cores, int *threads, - int *alloc_sockets, + int *alloc_sockets, + int *alloc_cores, int *alloc_lps, unsigned int *jobid); static void _cr_update_reservation(int reserve, uint32_t *reserved, @@ -145,8 +146,7 @@ void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id) return; } - /* SMB. We are still thinking about this. Does this make - sense? + /* We are still thinking about this. Does this make sense? if (req->task_dist == SLURM_DIST_ARBITRARY) { req->cpu_bind_type >= CPU_BIND_NONE; info("lllp_distribution jobid [%u] -m hostfile - auto binding off ", @@ -533,11 +533,11 @@ static int _task_layout_lllp_init(launch_tasks_request_msg_t *req, int *hw_sockets, int *hw_cores, int *hw_threads, - int *alloc_sockets, - int *alloc_lps, int *avail_cpus) { int i; + int alloc_sockets = 0, alloc_lps = 0; + int alloc_cores[conf->sockets]; if (req->cpu_bind_type & CPU_BIND_TO_THREADS) { /* Default: in here in case we decide to change the @@ -552,11 +552,9 @@ static int _task_layout_lllp_init(launch_tasks_request_msg_t *req, info ("task_layout cpu_bind_type CPU_BIND_TO_SOCKETS"); } - *alloc_sockets = 0; - *alloc_lps = 0; _get_resources_this_node(usable_cpus, usable_sockets, usable_cores, - usable_threads, alloc_sockets, alloc_lps, - &req->job_id); + usable_threads, &alloc_sockets, alloc_cores, + &alloc_lps, &req->job_id); *hw_sockets = *usable_sockets; *hw_cores = *usable_cores; @@ -569,8 +567,8 @@ static int _task_layout_lllp_init(launch_tasks_request_msg_t *req, req->ntasks_per_core, usable_cpus, usable_sockets, usable_cores, usable_threads, - *alloc_sockets, *alloc_lps, conf->cr_type); - + alloc_sockets, alloc_cores, + alloc_lps, conf->cr_type); /* Allocate masks array */ *masks_p = xmalloc(maxtasks * sizeof(bitstr_t*)); for (i = 0; i < maxtasks; i++) { @@ -593,13 +591,14 @@ static void _get_resources_this_node(int *cpus, int *sockets, int *cores, int *threads, - int *alloc_sockets, + int *alloc_sockets, + int *alloc_cores, int *alloc_lps, unsigned int *jobid) { int bit_index = 0; int i, j , k; - int this_socket = 0; + int this_socket = 0, cr_core_enabled = 0; /* FIX for heterogeneous socket/core/thread count per system * in future releases */ @@ -609,10 +608,13 @@ static void _get_resources_this_node(int *cpus, *threads = conf->threads; switch(conf->cr_type) { - case CR_SOCKET: - case CR_SOCKET_MEMORY: case CR_CORE: case CR_CORE_MEMORY: + for(i = 0; i < *sockets; i++) + alloc_cores[i] = 0; + cr_core_enabled = 1; + case CR_SOCKET: + case CR_SOCKET_MEMORY: case CR_CPU: case CR_CPU_MEMORY: for(i = 0; i < *sockets; i++) { @@ -623,6 +625,9 @@ static void _get_resources_this_node(int *cpus, *jobid, bit_index, lllp_reserved[bit_index]); if(lllp_reserved[bit_index] > 0) { *alloc_lps += 1; + if ((k == 0) && (cr_core_enabled)) { + alloc_cores[i]++; + } this_socket++; } bit_index++; @@ -639,8 +644,14 @@ static void _get_resources_this_node(int *cpus, break; } +#if(0) info("_get_resources jobid %d hostname %s alloc_sockets %d alloc_lps %d ", - jobid, conf->hostname, *alloc_sockets, *alloc_lps); + *jobid, conf->hostname, *alloc_sockets, *alloc_lps); + if (cr_core_enabled) + for (i = 0; i < *sockets; i++) + info("_get_resources %d hostname %s socket id %d cores %d ", + *jobid, conf->hostname, i, alloc_cores[i]); +#endif } /* @@ -676,7 +687,6 @@ static int _task_layout_lllp_cyclic(launch_tasks_request_msg_t *req, int hw_sockets = 0, hw_cores = 0, hw_threads = 0; int usable_cpus = 0, avail_cpus = 0; int usable_sockets = 0, usable_cores = 0, usable_threads = 0; - int alloc_sockets = 0, alloc_lps = 0; bitstr_t **masks = NULL; bool bind_to_exact_socket = true; @@ -696,8 +706,6 @@ static int _task_layout_lllp_cyclic(launch_tasks_request_msg_t *req, &hw_sockets, &hw_cores, &hw_threads, - &alloc_sockets, - &alloc_lps, &avail_cpus); if (retval != SLURM_SUCCESS) { return retval; @@ -782,7 +790,6 @@ static int _task_layout_lllp_block(launch_tasks_request_msg_t *req, int hw_sockets = 0, hw_cores = 0, hw_threads = 0; int usable_cpus = 0, avail_cpus = 0; int usable_sockets = 0, usable_cores = 0, usable_threads = 0; - int alloc_sockets = 0, alloc_lps = 0; bitstr_t **masks = NULL; bool bind_to_exact_socket = true; @@ -802,8 +809,6 @@ static int _task_layout_lllp_block(launch_tasks_request_msg_t *req, &hw_sockets, &hw_cores, &hw_threads, - &alloc_sockets, - &alloc_lps, &avail_cpus); if (retval != SLURM_SUCCESS) { return retval; @@ -906,9 +911,6 @@ static int _task_layout_lllp_block(launch_tasks_request_msg_t *req, * in srun. The second distribution "plane|block|cyclic" is computed * locally by each slurmd. * - * Restriction: Any restrictions? what about plane:cyclic or - * plane:block? Only plane:plane? FIXME!!! SMB!!! - * * The input to the lllp distribution algorithms is the gids * (tasksids) generated for the local node. * @@ -928,7 +930,6 @@ static int _task_layout_lllp_plane(launch_tasks_request_msg_t *req, int usable_sockets = 0, usable_cores = 0, usable_threads = 0; int plane_size = req->plane_size; int max_plane_size = 0; - int alloc_sockets = 0, alloc_lps = 0; bitstr_t **masks = NULL; bool bind_to_exact_socket = true; @@ -948,8 +949,6 @@ static int _task_layout_lllp_plane(launch_tasks_request_msg_t *req, &hw_sockets, &hw_cores, &hw_threads, - &alloc_sockets, - &alloc_lps, &avail_cpus); if (retval != SLURM_SUCCESS) { return retval; diff --git a/src/slaunch/opt.c b/src/slaunch/opt.c index 66ae47d6c91fbef1e4139a55aa4d78a2c602ad6d..1cd525586abd3101553148f23da8538e27a3ff4d 100644 --- a/src/slaunch/opt.c +++ b/src/slaunch/opt.c @@ -256,20 +256,22 @@ static int _verify_cpu_bind(const char *arg, char **cpu_bind, p = buf; while ((tok = strsep(&p, ";"))) { - if (!strcasecmp(tok, "help")) { - printf("CPU bind options:\n" - "\tq[uiet], quietly bind before task runs (default)\n" - "\tv[erbose], verbosely report binding before task runs\n" - "\tno[ne] don't bind tasks to CPUs (default)\n" - "\trank bind by task rank\n" - "\tmap_cpu:<list> specify a CPU ID binding for each task\n" - "\t where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n" - "\tmask_cpu:<list> specify a CPU ID binding mask for each task\n" - "\t where <list> is <mask1>,<mask2>,...<maskN>\n" - "\tsockets auto-generated masks bind to sockets\n" - "\tcores auto-generated masks bind to cores\n" - "\tthreads auto-generated masks bind to threads\n" - "\thelp show this help message\n"); + if (strcasecmp(tok, "help") == 0) { + printf( +"CPU bind options:\n" +" --cpu_bind= Bind tasks to CPUs\n" +" q[uiet] quietly bind before task runs (default)\n" +" v[erbose] verbosely report binding before task runs\n" +" no[ne] don't bind tasks to CPUs (default)\n" +" rank bind by task rank\n" +" map_cpu:<list> specify a CPU ID binding for each task\n" +" where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n" +" mask_cpu:<list> specify a CPU ID binding mask for each task\n" +" where <list> is <mask1>,<mask2>,...<maskN>\n" +" sockets auto-generated masks bind to sockets\n" +" cores auto-generated masks bind to cores\n" +" threads auto-generated masks bind to threads\n" +" help show this help message\n"); return 1; } else if ((strcasecmp(tok, "q") == 0) || (strcasecmp(tok, "quiet") == 0)) { @@ -381,19 +383,21 @@ static int _verify_mem_bind(const char *arg, char **mem_bind, } p = buf; - while((tok = strsep(&p, ";"))) { - if(!strcasecmp(tok, "help")) { - printf("Memory bind options:\n" - "\tq[uiet], quietly bind before task runs (default)\n" - "\tv[erbose], verbosely report binding before task runs\n" - "\tno[ne] don't bind tasks to memory (default)\n" - "\trank bind by task rank\n" - "\tlocal bind to memory local to processor\n" - "\tmap_mem:<list> specify a memory binding for each task\n" - "\t where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n" - "\tmask_mem:<list> specify a memory binding mask for each tasks\n" - "\t where <list> is <mask1>,<mask2>,...<maskN>\n" - "\thelp show this help message\n"); + while ((tok = strsep(&p, ";"))) { + if (strcasecmp(tok, "help") == 0) { + printf( +"Memory bind options:\n" +" --mem_bind= Bind memory to locality domains (ldom)\n" +" q[uiet] quietly bind before task runs (default)\n" +" v[erbose] verbosely report binding before task runs\n" +" no[ne] don't bind tasks to memory (default)\n" +" rank bind by task rank\n" +" local bind to memory local to processor\n" +" map_mem:<list> specify a memory binding for each task\n" +" where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n" +" mask_mem:<list> specify a memory binding mask for each tasks\n" +" where <list> is <mask1>,<mask2>,...<maskN>\n" +" help show this help message\n"); return 1; } else if ((strcasecmp(tok, "q") == 0) || diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 785fa0228cee3f37256f7470a5f08d7202f1494c..f0485e490eaad192b3efb93687dea78b1ebcb57b 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1037,8 +1037,12 @@ static int _load_step_state(struct job_record *job_ptr, Buf buffer) xfree(bit_fmt); } - switch_g_job_step_allocated(switch_tmp, + if (step_ptr->step_layout && step_ptr->step_layout->node_list) { + switch_g_job_step_allocated(switch_tmp, step_ptr->step_layout->node_list); + } else { + switch_g_job_step_allocated(switch_tmp, NULL); + } info("recovered job step %u.%u", job_ptr->job_id, step_id); return SLURM_SUCCESS; @@ -2693,6 +2697,13 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, job_desc_msg->min_threads = 1; /* default thread count of 1 */ if (job_desc_msg->min_nodes == NO_VAL) job_desc_msg->min_nodes = 1; /* default node count of 1 */ + if (job_desc_msg->min_sockets == NO_VAL) + job_desc_msg->min_sockets = 1; /* default socket count of 1 */ + if (job_desc_msg->min_cores == NO_VAL) + job_desc_msg->min_cores = 1; /* default core count of 1 */ + if (job_desc_msg->min_threads == NO_VAL) + job_desc_msg->min_threads = 1; /* default thread count of 1 */ + if (job_desc_msg->job_min_procs == NO_VAL) job_desc_msg->job_min_procs = 1; /* default 1 cpu per node */ if (job_desc_msg->job_min_sockets == NO_VAL) @@ -3454,7 +3465,6 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) } } - if (job_specs->job_min_threads != NO_VAL && detail_ptr) { if (super_user || (detail_ptr->job_min_threads > job_specs->job_min_threads)) { diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 73982d420cf9b96ba5c9b5a45042b3a7f7fe26b5..3c3891dc408b48d2bbe13f8a5421cf7a5caad86a 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1386,9 +1386,12 @@ extern void build_node_details(struct job_record *job_ptr) error_code = select_g_get_extra_jobinfo( node_ptr, job_ptr, SELECT_AVAIL_CPUS, &usable_lps); - if (cr_enabled) - job_ptr->alloc_lps[cr_count++] = usable_lps; - if(error_code != SLURM_SUCCESS) { + if (error_code == SLURM_SUCCESS) { + if (cr_enabled && job_ptr->alloc_lps) { + job_ptr->alloc_lps[cr_count++] = + usable_lps; + } + } else { if (cr_enabled) { xfree(job_ptr->alloc_lps); job_ptr->alloc_lps = NULL; diff --git a/src/srun/opt.c b/src/srun/opt.c index ddc346f0407b458eeedf8a87299745de152c99a1..ce62ca63b330e3beefcfdde28c977a8ad0126ed0 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -153,6 +153,7 @@ #define LONG_OPT_NTASKSPERCORE 0x138 #define LONG_OPT_PRINTREQ 0x139 #define LONG_OPT_JOBMEM 0x13a +#define LONG_OPT_HINT 0x13b /*---- global variables, defined in opt.h ----*/ char **remote_argv; @@ -211,7 +212,12 @@ static bool _under_parallel_debugger(void); static void _usage(void); static bool _valid_node_list(char **node_list_pptr); static task_dist_states_t _verify_dist_type(const char *arg, uint32_t *psize); -static bool _verify_cpu_core_thread_count(const char *arg, +static bool _verify_socket_core_thread_count(const char *arg, + int *min_sockets, int *max_sockets, + int *min_cores, int *max_cores, + int *min_threads, int *max_threads, + cpu_bind_type_t *cpu_bind_type); +static bool _verify_hint(const char *arg, int *min_sockets, int *max_sockets, int *min_cores, int *max_cores, int *min_threads, int *max_threads, @@ -447,19 +453,21 @@ static int _verify_cpu_bind(const char *arg, char **cpu_bind, p = buf; while ((tok = strsep(&p, ";"))) { if (strcasecmp(tok, "help") == 0) { - printf("CPU bind options:\n" - "\tq[uiet], quietly bind before task runs (default)\n" - "\tv[erbose], verbosely report binding before task runs\n" - "\tno[ne] don't bind tasks to CPUs (default)\n" - "\trank bind by task rank\n" - "\tmap_cpu:<list> specify a CPU ID binding for each task\n" - "\t where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n" - "\tmask_cpu:<list> specify a CPU ID binding mask for each task\n" - "\t where <list> is <mask1>,<mask2>,...<maskN>\n" - "\tsockets auto-generated masks bind to sockets\n" - "\tcores auto-generated masks bind to cores\n" - "\tthreads auto-generated masks bind to threads\n" - "\thelp show this help message\n"); + printf( +"CPU bind options:\n" +" --cpu_bind= Bind tasks to CPUs\n" +" q[uiet] quietly bind before task runs (default)\n" +" v[erbose] verbosely report binding before task runs\n" +" no[ne] don't bind tasks to CPUs (default)\n" +" rank bind by task rank\n" +" map_cpu:<list> specify a CPU ID binding for each task\n" +" where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n" +" mask_cpu:<list> specify a CPU ID binding mask for each task\n" +" where <list> is <mask1>,<mask2>,...<maskN>\n" +" sockets auto-generated masks bind to sockets\n" +" cores auto-generated masks bind to cores\n" +" threads auto-generated masks bind to threads\n" +" help show this help message\n"); return 1; } else if ((strcasecmp(tok, "q") == 0) || (strcasecmp(tok, "quiet") == 0)) { @@ -573,17 +581,19 @@ static int _verify_mem_bind(const char *arg, char **mem_bind, p = buf; while ((tok = strsep(&p, ";"))) { if (strcasecmp(tok, "help") == 0) { - printf("Memory bind options:\n" - "\tq[uiet], quietly bind before task runs (default)\n" - "\tv[erbose], verbosely report binding before task runs\n" - "\tno[ne] don't bind tasks to memory (default)\n" - "\trank bind by task rank\n" - "\tlocal bind to memory local to processor\n" - "\tmap_mem:<list> specify a memory binding for each task\n" - "\t where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n" - "\tmask_mem:<list> specify a memory binding mask for each tasks\n" - "\t where <list> is <mask1>,<mask2>,...<maskN>\n" - "\thelp show this help message\n"); + printf( +"Memory bind options:\n" +" --mem_bind= Bind memory to locality domains (ldom)\n" +" q[uiet] quietly bind before task runs (default)\n" +" v[erbose] verbosely report binding before task runs\n" +" no[ne] don't bind tasks to memory (default)\n" +" rank bind by task rank\n" +" local bind to memory local to processor\n" +" map_mem:<list> specify a memory binding for each task\n" +" where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n" +" mask_mem:<list> specify a memory binding mask for each tasks\n" +" where <list> is <mask1>,<mask2>,...<maskN>\n" +" help show this help message\n"); return 1; } else if ((strcasecmp(tok, "q") == 0) || @@ -667,7 +677,7 @@ static int _verify_mem_bind(const char *arg, char **mem_bind, * RET true if valid */ static bool -_verify_cpu_core_thread_count(const char *start_ptr, +_verify_socket_core_thread_count(const char *arg, int *min_sockets, int *max_sockets, int *min_cores, int *max_cores, int *min_threads, int *max_threads, @@ -675,7 +685,7 @@ _verify_cpu_core_thread_count(const char *start_ptr, { bool tmp_val,ret_val; int i,j; - const char *cur_ptr = start_ptr; + const char *cur_ptr = arg; char buf[3][48]; /* each can hold INT64_MAX - INT64_MAX */ buf[0][0] = '\0'; buf[1][0] = '\0'; @@ -721,6 +731,74 @@ _verify_cpu_core_thread_count(const char *start_ptr, return ret_val; } +/* + * verify that a hint is valid and convert it into the implied settings + * RET true if valid + */ +static bool +_verify_hint(const char *arg, + int *min_sockets, int *max_sockets, + int *min_cores, int *max_cores, + int *min_threads, int *max_threads, + cpu_bind_type_t *cpu_bind_type) +{ + char *buf, *p, *tok; + if (!arg) { + return true; + } + + buf = xstrdup(arg); + p = buf; + /* change all ',' delimiters not followed by a digit to ';' */ + /* simplifies parsing tokens while keeping map/mask together */ + while (*p) { + if (*p == ',') { + if (!isdigit(*(p+1))) + *p = ';'; + } + *p++; + } + + p = buf; + while ((tok = strsep(&p, ";"))) { + if (strcasecmp(tok, "help") == 0) { + printf( +"Application hint options:\n" +" --hint= Bind tasks according to application hints\n" +" compute_bound use all cores in each physical CPU\n" +" memory_bound use only one core in each physical CPU\n" +" [no]multithread [don't] use extra threads with in-core multi-threading\n" +" help show this help message\n"); + return 1; + } else if (strcasecmp(tok, "compute_bound") == 0) { + *min_sockets = 1; + *max_sockets = INT_MAX; + *min_cores = 1; + *max_cores = INT_MAX; + *cpu_bind_type |= CPU_BIND_TO_CORES; + } else if (strcasecmp(tok, "memory_bound") == 0) { + *min_cores = 1; + *max_cores = 1; + *cpu_bind_type |= CPU_BIND_TO_CORES; + } else if (strcasecmp(tok, "multithread") == 0) { + *min_threads = 1; + *max_threads = INT_MAX; + *cpu_bind_type |= CPU_BIND_TO_THREADS; + } else if (strcasecmp(tok, "nomultithread") == 0) { + *min_threads = 1; + *max_threads = 1; + *cpu_bind_type |= CPU_BIND_TO_THREADS; + } else { + error("unrecognized --hint argument \"%s\", see --hint=help", tok); + xfree(buf); + return 1; + } + } + + xfree(buf); + return 0; +} + /* return command name from its full path name */ static char * _base_name(char* command) { @@ -843,16 +921,15 @@ static void _opt_default() opt.cpus_set = false; opt.min_nodes = 1; opt.max_nodes = 0; - opt.min_sockets_per_node = 0; /* request, not constraint (mincpus) */ - opt.max_sockets_per_node = 0; - opt.min_cores_per_socket = 0; /* request, not constraint (mincores) */ - opt.max_cores_per_socket = 0; - opt.min_threads_per_core = 0; /* request, not constraint - * (minthreads */ - opt.max_threads_per_core = 0; - opt.ntasks_per_node = 0; - opt.ntasks_per_socket = 0; - opt.ntasks_per_core = 0; + opt.min_sockets_per_node = NO_VAL; /* requested min/maxsockets */ + opt.max_sockets_per_node = NO_VAL; + opt.min_cores_per_socket = NO_VAL; /* requested min/maxcores */ + opt.max_cores_per_socket = NO_VAL; + opt.min_threads_per_core = NO_VAL; /* requested min/maxthreads */ + opt.max_threads_per_core = NO_VAL; + opt.ntasks_per_node = NO_VAL; /* ntask max limits */ + opt.ntasks_per_socket = NO_VAL; + opt.ntasks_per_core = NO_VAL; opt.nodes_set = false; opt.cpu_bind_type = 0; opt.cpu_bind = NULL; @@ -1281,6 +1358,7 @@ void set_options(const int argc, char **argv, int first) {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, {"mem", required_argument, 0, LONG_OPT_MEM}, {"job-mem", required_argument, 0, LONG_OPT_JOBMEM}, + {"hint", required_argument, 0, LONG_OPT_HINT}, {"mpi", required_argument, 0, LONG_OPT_MPI}, {"no-shell", no_argument, 0, LONG_OPT_NOSHELL}, {"tmp", required_argument, 0, LONG_OPT_TMP}, @@ -1392,7 +1470,7 @@ void set_options(const int argc, char **argv, int first) if(!first && opt.extra_set) break; - opt.extra_set = _verify_cpu_core_thread_count( + opt.extra_set = _verify_socket_core_thread_count( optarg, &opt.min_sockets_per_node, &opt.max_sockets_per_node, @@ -1840,6 +1918,18 @@ void set_options(const int argc, char **argv, int first) &opt.min_threads_per_core, &opt.max_threads_per_core, true ); break; + case LONG_OPT_HINT: + if (_verify_hint(optarg, + &opt.min_sockets_per_node, + &opt.max_sockets_per_node, + &opt.min_cores_per_socket, + &opt.max_cores_per_socket, + &opt.min_threads_per_core, + &opt.max_threads_per_core, + &opt.cpu_bind_type)) { + exit(1); + } + break; case LONG_OPT_NTASKSPERNODE: opt.ntasks_per_node = _get_int(optarg, "ntasks-per-node", true); @@ -2615,6 +2705,8 @@ static void _help(void) if (conf->task_plugin != NULL && strcasecmp(conf->task_plugin, "task/affinity") == 0) { printf( +" --hint= Bind tasks according to application hints\n" +" (see \"--hint=help\" for options)\n" " --cpu_bind= Bind tasks to CPUs\n" " (see \"--cpu_bind=help\" for options)\n" " --mem_bind= Bind memory to locality domains (ldom)\n" diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 34f7e35fa908edb9f407f5e0b56039feb5f9ef20..1eadf38a4ffe913497532741686d2eba39dba9f9 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -86,7 +86,7 @@ cset mpirun "mpirun" cset totalviewcli "/usr/local/bin/totalviewcli" # Pattern to match your shell prompt -cset prompt {(%|#|\$) *$} +cset prompt {(%|#|\$|\]) *$} # # Specify locations of other executable files used diff --git a/testsuite/expect/test1.89 b/testsuite/expect/test1.89 index 22fc50ca5118c61ab00fb7cd26e13433677542ba..73750e5acf05152467d8ccd9b0b6399d63d2d77c 100755 --- a/testsuite/expect/test1.89 +++ b/testsuite/expect/test1.89 @@ -102,6 +102,7 @@ expect { # # Run a job step with affinity # +set expected_mask [ expr ((1 << $task_cnt) - 1) ] set task_mask 0 send "$srun -c1 --cpu_bind=rank $file_prog\n" expect { @@ -121,8 +122,8 @@ expect { } -re $prompt } -if {$task_mask != $mask} { - send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$mask)\n" +if {$task_mask != $expected_mask} { + send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$expected_mask)\n" set exit_code 1 } diff --git a/testsuite/expect/test18.36 b/testsuite/expect/test18.36 index f9f652d5c583d7f6cbc50b57e0409501ec6e6768..50bd11a20db2e44d6d8d7c3e74a8deb946537400 100755 --- a/testsuite/expect/test18.36 +++ b/testsuite/expect/test18.36 @@ -121,6 +121,7 @@ expect { # # Run a job step with affinity # +set expected_mask [ expr ((1 << $task_cnt) - 1) ] set task_mask 0 send "$slaunch -n $available_cpus --cpu_bind=rank $file_prog\n" expect { @@ -139,7 +140,7 @@ expect { } -re $prompt } -if {$task_mask != $mask} { +if {$task_mask != $expected_mask} { send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$mask)\n" set exit_code 1 }