diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in index 37d95bbbe866a0299e66ae11b647509d0397c390..6273e459a1423e685c617510fdd0edba08bc448d 100644 --- a/doc/html/configurator.html.in +++ b/doc/html/configurator.html.in @@ -505,7 +505,6 @@ CPU affinity support <DT><B>TaskPluginParam</B> (As used by <I>TaskPlugin=Affinity</I> only): <DT><input type="radio" name="task_plugin_param" value="Cpusets"> <B>Cpusets</B>: Use <I>cpusets</I> to control task binding. - To become available in the Spring of 2007. <DT><input type="radio" name="task_plugin_param" value="Sched" checked> <B>Sched</B>: Use <I>sched_setaffinity</I> or <I>plpa_sched_setaffinity</I> (if available) to bind tasks to processors. This is the default mode of @@ -645,6 +644,6 @@ before terminating all remaining tasks. A value of zero indicates unlimited wait </FORM> <HR> <p class="footer">UCRL-WEB-225274<br> -Last modified 19 December 2006</p> +Last modified 22 March 2007</p> </BODY> diff --git a/src/plugins/task/affinity/affinity.h b/src/plugins/task/affinity/affinity.h index e46fc8fb27fcc28349dc2421005691861820aaf7..446e98dfc2bdd99f071cc5f6ecc0bf4a2cfa0e90 100644 --- a/src/plugins/task/affinity/affinity.h +++ b/src/plugins/task/affinity/affinity.h @@ -88,14 +88,22 @@ #include "src/common/util-net.h" #include "src/common/slurm_resource_info.h" +#define CPUSET_DIR "/etc/cpuset" + /*** from affinity.c ***/ void slurm_chkaffinity(cpu_set_t *mask, slurmd_job_t *job, int statval); int get_cpuset(cpu_set_t *mask, slurmd_job_t *job); int slurm_setaffinity(pid_t pid, size_t size, const cpu_set_t *mask); int slurm_getaffinity(pid_t pid, size_t size, cpu_set_t *mask); + +/*** from cpuset.c ***/ +#ifdef HAVE_NUMA +int slurm_set_memset(char *path, nodemask_t *new_mask); +int slurm_memset_available(void); +#endif +int slurm_get_cpuset(char *path, pid_t pid, size_t size, cpu_set_t *mask); int slurm_set_cpuset(char *path, pid_t pid, size_t size, const cpu_set_t *mask); -int slurm_get_cpuset(char *path, pid_t pid, size_t size, cpu_set_t *mask); /*** from numa.c ***/ #ifdef HAVE_NUMA diff --git a/src/plugins/task/affinity/cpuset.c b/src/plugins/task/affinity/cpuset.c index 97decac63fddfe824b51a0bb362ff18464c88bc7..ab2a2791db353aeb3654bb83267afc8e83ad5149 100644 --- a/src/plugins/task/affinity/cpuset.c +++ b/src/plugins/task/affinity/cpuset.c @@ -38,6 +38,22 @@ #include "affinity.h" +static void _cpuset_to_cpustr(const cpu_set_t *mask, char *str) +{ + int i; + char tmp[16]; + + str[0] = '\0'; + for (i=0; i<CPU_SETSIZE; i++) { + if (!CPU_ISSET(i, mask)) + continue; + snprintf(tmp, sizeof(tmp), "%d", i); + if (str[0]) + strcat(str, ","); + strcat(str, tmp); + } +} + int slurm_set_cpuset(char *path, pid_t pid, size_t size, const cpu_set_t *mask) { @@ -51,17 +67,17 @@ int slurm_set_cpuset(char *path, pid_t pid, size_t size, } snprintf(file_path, sizeof(file_path), "%s/notify_on_release", path); - fd = open(file_path, O_CREAT | O_WRONLY); + fd = open(file_path, O_CREAT | O_WRONLY, 0700); if (fd < 0) { error("open(%s): %m", file_path); return -1; } - rc = write(fd, "1", 2); /* not sure if this is right */ + rc = write(fd, "1", 2); close(fd); snprintf(file_path, sizeof(file_path), "%s/cpus", path); - cpuset_to_str(mask, mstr); - fd = open(file_path, O_CREAT | O_WRONLY); + _cpuset_to_cpustr(mask, mstr); + fd = open(file_path, O_CREAT | O_WRONLY, 0700); if (fd < 0) { error("open(%s): %m", file_path); return -1; @@ -75,15 +91,15 @@ int slurm_set_cpuset(char *path, pid_t pid, size_t size, snprintf(file_path, sizeof(file_path), "%s/tasks", path); snprintf(mstr, sizeof(mstr), "%d", pid); - fd = open(file_path, O_CREAT); + fd = open(file_path, O_CREAT | O_WRONLY, 0700); if (fd < 0) { error("open(%s): %m", file_path); return -1; } - rc = write(fd, mstr, strlen(mstr)); + rc = write(fd, mstr, strlen(mstr)+1); close(fd); if (rc < 1) { - error("write(%s): %m", file_path); + error("write(%s, %s): %m", file_path, mstr); return -1; } @@ -111,7 +127,7 @@ int slurm_get_cpuset(char *path, pid_t pid, size_t size, cpu_set_t *mask) str_to_cpuset(mask, mstr); snprintf(file_path, sizeof(file_path), "%s/tasks", path); - fd = open(file_path, O_RDONLY); + fd = open(file_path, O_CREAT | O_RDONLY, 0700); if (fd < 0) { error("open(%s): %m", file_path); return -1; @@ -122,177 +138,55 @@ int slurm_get_cpuset(char *path, pid_t pid, size_t size, cpu_set_t *mask) error("read(%s): %m", file_path); return -1; } - /* verify that pid is in mstr */ + + /* FIXME: verify that pid is in mstr */ return 0; } -#if 0 -int get_memset_mask(slurm_memmask_t *mem_mask, slurm_cpumask_t *cpu_mask, - slurmd_job_t *job) +#ifdef HAVE_NUMA +int slurm_memset_available(void) { - slurm_cpumask_t cur_mask, tst_mask; - int nbr_nodes, nummasks, i, j; - char *curstr, *selstr; - char mstr[1 + CPU_SETSIZE / 4]; - int local_id = job->envtp->localid; - - debug2("get_memset_mask bind_type = %d, bind_list = %s\n", - job->mem_bind_type, job->mem_bind); - - /* If "not specified" or "None" or "Rank", - * do not set a new memory mask in the CPUset */ - if ((!job->mem_bind_type) - || (job->mem_bind_type & (MEM_BIND_NONE | MEM_BIND_RANK))) - return false; - - /* For now, make LOCAL and MAP_CPU the same */ - if (job->mem_bind_type & (MEM_BIND_LOCAL | MEM_BIND_MAP)) { - nbr_nodes = cs_nr_nodes(); - for (i=0; i<nbr_nodes; i++) { - cs_get_node_cpus(i, &cur_mask); - cs_cpumask_and(&tst_mask, &cur_mask, cpu_mask); - if (!cs_cpumask_empty(&tst_mask)) { - cs_memmask_add(mem_mask, i); - debug2("added node = %d to mem mask %08x \n", - i,*mem_mask); - } - } - - return true; - } - - /* allow user to set specific memory masks */ - if (job->mem_bind_type & MEM_BIND_MASK) { - /* find mask entry for this task */ - nummasks = 0; - selstr = NULL; + char file_path[PATH_MAX]; + struct stat buf; - curstr = job->mem_bind; - while (*curstr) { - if (nummasks == local_id) { - selstr = curstr; - break; - } - if (*curstr == ',') - nummasks++; - curstr++; - } - /* check if we found a mem entry */ - if (!selstr){ - error("not enough entries in mask_mem:<list>"); - return false; - } - /* extract the selected mask from the list */ - j = 0; - curstr = mstr; - while ((*selstr && *selstr != ',') && - (j++ < (CPU_SETSIZE/4))) { - *curstr++ = *selstr++; - } - *curstr = '\0'; - /* convert mask string into cpu_set_t mask */ - if (str_to_cpuset( (cpu_set_t *) mem_mask, mstr) < 0) { - error("str_to_cpuset %s", mstr); - return false; - } - return true; - } - return false; + snprintf(file_path, sizeof(file_path), "%s/mems", CPUSET_DIR); + return stat(file_path, &buf); } -int make_task_cpuset(slurmd_job_t *job, slurm_cpumask_t *cpu_mask, - slurm_memmask_t *mem_mask) +int slurm_set_memset(char *path, nodemask_t *new_mask); { + char file_path[PATH_MAX]; + char mstr[1 + CPU_SETSIZE / 4], tmp[10]; + int fd, i, max_node, wrote; + ssize_t rc; + static nodemask_t mem_mask; - char path[PATH_MAX]; - char *current_cs = NULL; - int retval = 0; - int success = 0; - - info("cpuset - cs_init called"); - cs_init(); - - current_cs = cs_get_current(); - if (!current_cs) - return -ENOMEM; - - int l = snprintf(path, PATH_MAX, "%sslurm%u_%d", current_cs, job->jobid, - job->envtp->localid); - if (l > PATH_MAX) { - retval = -ENAMETOOLONG; - goto out; - } - - debug("cpuset path = %s",path); - - retval = cs_create(path); - if (retval < 0) - goto out; - retval = cs_set_autoclean(path, CS_AUTOCLEAN); - if (retval < 0) - goto out; - - cs_lock_libcpuset(); - retval = cs_set_cpus(path, *cpu_mask); - if (retval < 0) { - debug3("cpuset - error on cs_set_cpus = %d %s", - retval, cs_strerror(retval)); + snprintf(file_path, sizeof(file_path), "%s/mems", CPUSET_DIR); + fd = open(file_path, O_CREAT | O_RDONLY, 0700); + if (fd < 0) { + error("open(%s): %m", file_path); + return -1; } - success = 1; - - if (cs_supports_mem()) { - /* Check for mem_bind options */ - if (get_memset_mask(mem_mask, cpu_mask, job)) { - debug("cpuset - mem_mask = %d (decimal) and %08x (hex)", - *mem_mask, *mem_mask); - retval = cs_set_mems(path, *mem_mask); - if (retval < 0) { - debug3("cpuset - error on cs_set_mems = %d %s", - retval,cs_strerror(retval)); - success = 0; - goto out_created; - } - } else { - /* Copy parent of new cpuset (i.e current) mems mask */ - retval = cs_get_mems(current_cs, mem_mask); - if (retval < 0) { - debug3("cpuset - error on cs_get_mems = %d %s", - retval,cs_strerror(retval)); - success = 0; - goto out_created; - } - retval = cs_set_mems(path, *mem_mask); - if (retval < 0) { - debug3("cpuset - error on cs_set_mems = %d %s", - retval, cs_strerror(retval)); - success = 0; - goto out_created; - } - } + mstr[0] = '\0'; + max_node = numa_max_node(); + for (i=0, i<=max_node, i++) { + if (!nodemask_isset(new_mask, i)) + continue; + snprintf(tmp, sizeof(tmp), "%d", i); + if (mstr[0]) + strcat(mstr, ","); + strcat(mstr, tmp); } - retval = cs_add_task(path, job->envtp->task_pid); - if (retval < 0) { - debug3("cpuset - error on cs_add_task = %d %s", - retval, cs_strerror(retval)); + i = strlen(i) + 1; + rc = write(fd, mstr, i+1); + close(fd); + if (rc <= i) { + error("write(%s): %m", file_path); + return -1; } - - out_created: - if (!success) - cs_destroy(path); - cs_unlock_libcpuset(); - - out: - free(current_cs); - - current_cs = cs_get_current(); - debug("cpuset - exit make_task_cpuset retval = %d cpuset = %s", - retval, current_cs); - free(current_cs); - - return retval; + return 0; } - #endif diff --git a/src/plugins/task/affinity/task_affinity.c b/src/plugins/task/affinity/task_affinity.c index 0a27e81ef33e657bdeecdf7bc0a1a6cc390ef041..144a1bc0f2194459a940dcbed5221ff26efba56c 100644 --- a/src/plugins/task/affinity/task_affinity.c +++ b/src/plugins/task/affinity/task_affinity.c @@ -48,8 +48,6 @@ #include "affinity.h" #include "dist_tasks.h" -#define CPUSET_DIR "/dev/cpuset" - /* * These variables are required by the generic plugin interface. If they * are not found in the plugin, the plugin loader will ignore it. @@ -155,12 +153,20 @@ int task_slurmd_release_resources ( uint32_t job_id ) */ int task_pre_launch ( slurmd_job_t *job ) { + char path[PATH_MAX]; + debug("affinity task_pre_launch: %u.%u, task %d", job->jobid, job->stepid, job->envtp->procid); - if (conf->use_cpusets) + if (conf->use_cpusets) { info("Using cpuset affinity for tasks"); - else + if (snprintf(path, PATH_MAX, "%s/slurm%u.%u_%d", + CPUSET_DIR, job->jobid, job->stepid, + job->envtp->localid) > PATH_MAX) { + error("cpuset path too long"); + return SLURM_ERROR; + } + } else info("Using sched_affinity for tasks"); /*** CPU binding support ***/ @@ -174,14 +180,6 @@ int task_pre_launch ( slurmd_job_t *job ) if (get_cpuset(&new_mask, job) && (!(job->cpu_bind_type & CPU_BIND_NONE))) { if (conf->use_cpusets) { - char path[PATH_MAX]; - if (snprintf(path, PATH_MAX, "%sslurm%u_%d", - CPUSET_DIR, job->jobid, - job->envtp->localid) > - PATH_MAX) { - error("cpuset path too long"); - return SLURM_ERROR; - } setval = slurm_set_cpuset(path, mypid, sizeof(new_mask), &new_mask); @@ -202,15 +200,24 @@ int task_pre_launch ( slurmd_job_t *job ) } #ifdef HAVE_NUMA - if (job->mem_bind_type && (numa_available() >= 0)) { + if (conf->use_cpusets && (slurm_memset_available() >= 0)) { nodemask_t new_mask, cur_mask; cur_mask = numa_get_membind(); - if (get_memset(&new_mask, job)) { - if (!(job->mem_bind_type & MEM_BIND_NONE)) { - numa_set_membind(&new_mask); - cur_mask = new_mask; - } + if (get_memset(&new_mask, job) + && (!(job->mem_bind_type & MEM_BIND_NONE))) { + slurm_set_memset(path, &new_mask); + cur_mask = new_mask; + } + slurm_chk_memset(&cur_mask, job); + } else if (job->mem_bind_type && (numa_available() >= 0)) { + nodemask_t new_mask, cur_mask; + + cur_mask = numa_get_membind(); + if (get_memset(&new_mask, job) + && (!(job->mem_bind_type & MEM_BIND_NONE))) { + numa_set_membind(&new_mask); + cur_mask = new_mask; } slurm_chk_memset(&cur_mask, job); }