Skip to content
Snippets Groups Projects
Commit d55edbe9 authored by Moe Jette's avatar Moe Jette
Browse files

Needed to set "mems" and "cpus" before any tasks could be added to a cpuset.

parent ddc48a62
No related branches found
No related tags found
No related merge requests found
...@@ -103,7 +103,7 @@ int slurm_memset_available(void); ...@@ -103,7 +103,7 @@ int slurm_memset_available(void);
#endif #endif
int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid); int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid);
int slurm_get_cpuset(char *path, pid_t pid, size_t size, cpu_set_t *mask); int slurm_get_cpuset(char *path, pid_t pid, size_t size, cpu_set_t *mask);
int slurm_set_cpuset(char *path, pid_t pid, size_t size, int slurm_set_cpuset(char *base, char *path, pid_t pid, size_t size,
const cpu_set_t *mask); const cpu_set_t *mask);
/*** from numa.c ***/ /*** from numa.c ***/
......
...@@ -59,6 +59,14 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid) ...@@ -59,6 +59,14 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid)
char file_path[PATH_MAX], mstr[16]; char file_path[PATH_MAX], mstr[16];
int fd, rc; int fd, rc;
if (mkdir(path, 0700) && (errno != EEXIST)) {
error("mkdir(%s): %m", path);
return -1;
}
if (chown(path, uid, gid))
error("chown(%s): %m", path);
/* copy "cpus" contents from parent directory */
snprintf(file_path, sizeof(file_path), "%s/cpus", base); snprintf(file_path, sizeof(file_path), "%s/cpus", base);
fd = open(file_path, O_RDONLY); fd = open(file_path, O_RDONLY);
if (fd < 0) { if (fd < 0) {
...@@ -71,21 +79,51 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid) ...@@ -71,21 +79,51 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid)
error("read(%s): %m", file_path); error("read(%s): %m", file_path);
return -1; return -1;
} }
snprintf(file_path, sizeof(file_path), "%s/cpus", path);
if (mkdir(path, 0700) && (errno != EEXIST)) { fd = open(file_path, O_CREAT | O_WRONLY, 0700);
error("mkdir(%s): %m", path); if (fd < 0) {
error("open(%s): %m", file_path);
return -1;
}
rc = write(fd, mstr, rc);
close(fd);
if (rc < 1) {
error("write(%s): %m", file_path);
return -1; return -1;
} }
snprintf(file_path, sizeof(file_path), "%s/cpus", path); /* copy "mems" contents from parent directory, if it exists */
snprintf(file_path, sizeof(file_path), "%s/mems", base);
fd = open(file_path, O_RDONLY);
if (fd < 0) {
error("open(%s): %m", file_path);
} else {
rc = read(fd, mstr, sizeof(mstr));
close(fd);
if (rc < 1)
error("read(%s): %m", file_path);
snprintf(file_path, sizeof(file_path), "%s/mems", path);
fd = open(file_path, O_CREAT | O_WRONLY, 0700);
if (fd < 0) {
error("open(%s): %m", file_path);
return -1;
}
rc = write(fd, mstr, rc);
close(fd);
if (rc < 1)
error("write(%s): %m", file_path);
}
snprintf(file_path, sizeof(file_path), "%s/notify_on_release", path);
fd = open(file_path, O_CREAT | O_WRONLY, 0700); fd = open(file_path, O_CREAT | O_WRONLY, 0700);
if (fd < 0) { if (fd < 0) {
error("open(%s): %m", file_path); error("open(%s): %m", file_path);
return -1; return -1;
} }
rc = write(fd, mstr, rc); rc = write(fd, "1", 2);
close(fd); close(fd);
/* only now can we add tasks */
snprintf(file_path, sizeof(file_path), "%s/tasks", path); snprintf(file_path, sizeof(file_path), "%s/tasks", path);
snprintf(mstr, sizeof(mstr), "%d", getpid()); snprintf(mstr, sizeof(mstr), "%d", getpid());
fd = open(file_path, O_CREAT | O_WRONLY, 0700); fd = open(file_path, O_CREAT | O_WRONLY, 0700);
...@@ -100,22 +138,10 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid) ...@@ -100,22 +138,10 @@ int slurm_build_cpuset(char *base, char *path, uid_t uid, gid_t gid)
return -1; return -1;
} }
snprintf(file_path, sizeof(file_path), "%s/notify_on_release", path);
fd = open(file_path, O_CREAT | O_WRONLY, 0700);
if (fd < 0) {
error("open(%s): %m", file_path);
return -1;
}
rc = write(fd, "1", 2);
close(fd);
if (chown(path, uid, gid))
error("chown(%s): %m", path);
return 0; return 0;
} }
int slurm_set_cpuset(char *path, pid_t pid, size_t size, int slurm_set_cpuset(char *base, char *path, pid_t pid, size_t size,
const cpu_set_t *mask) const cpu_set_t *mask)
{ {
int fd, rc; int fd, rc;
...@@ -127,15 +153,6 @@ int slurm_set_cpuset(char *path, pid_t pid, size_t size, ...@@ -127,15 +153,6 @@ int slurm_set_cpuset(char *path, pid_t pid, size_t size,
return -1; return -1;
} }
snprintf(file_path, sizeof(file_path), "%s/notify_on_release", path);
fd = open(file_path, O_CREAT | O_WRONLY, 0700);
if (fd < 0) {
error("open(%s): %m", file_path);
return -1;
}
rc = write(fd, "1", 2);
close(fd);
snprintf(file_path, sizeof(file_path), "%s/cpus", path); snprintf(file_path, sizeof(file_path), "%s/cpus", path);
_cpuset_to_cpustr(mask, mstr); _cpuset_to_cpustr(mask, mstr);
fd = open(file_path, O_CREAT | O_WRONLY, 0700); fd = open(file_path, O_CREAT | O_WRONLY, 0700);
...@@ -150,6 +167,37 @@ int slurm_set_cpuset(char *path, pid_t pid, size_t size, ...@@ -150,6 +167,37 @@ int slurm_set_cpuset(char *path, pid_t pid, size_t size,
return -1; return -1;
} }
/* copy "mems" contents from parent directory, if it exists */
snprintf(file_path, sizeof(file_path), "%s/mems", base);
fd = open(file_path, O_RDONLY);
if (fd < 0) {
error("open(%s): %m", file_path);
} else {
rc = read(fd, mstr, sizeof(mstr));
close(fd);
if (rc < 1)
error("read(%s): %m", file_path);
snprintf(file_path, sizeof(file_path), "%s/mems", path);
fd = open(file_path, O_CREAT | O_WRONLY, 0700);
if (fd < 0) {
error("open(%s): %m", file_path);
return -1;
}
rc = write(fd, mstr, rc);
close(fd);
if (rc < 1)
error("write(%s): %m", file_path);
}
snprintf(file_path, sizeof(file_path), "%s/notify_on_release", path);
fd = open(file_path, O_CREAT | O_WRONLY, 0700);
if (fd < 0) {
error("open(%s): %m", file_path);
return -1;
}
rc = write(fd, "1", 2);
close(fd);
snprintf(file_path, sizeof(file_path), "%s/tasks", path); snprintf(file_path, sizeof(file_path), "%s/tasks", path);
snprintf(mstr, sizeof(mstr), "%d", pid); snprintf(mstr, sizeof(mstr), "%d", pid);
fd = open(file_path, O_CREAT | O_WRONLY, 0700); fd = open(file_path, O_CREAT | O_WRONLY, 0700);
......
...@@ -174,15 +174,20 @@ int task_pre_setuid ( slurmd_job_t *job ) ...@@ -174,15 +174,20 @@ int task_pre_setuid ( slurmd_job_t *job )
*/ */
int task_pre_launch ( slurmd_job_t *job ) int task_pre_launch ( slurmd_job_t *job )
{ {
char path[PATH_MAX]; char base[PATH_MAX], path[PATH_MAX];
debug("affinity task_pre_launch: %u.%u, task %d", debug("affinity task_pre_launch: %u.%u, task %d",
job->jobid, job->stepid, job->envtp->procid); job->jobid, job->stepid, job->envtp->procid);
if (conf->use_cpusets) { if (conf->use_cpusets) {
info("Using cpuset affinity for tasks"); info("Using cpuset affinity for tasks");
if (snprintf(path, PATH_MAX, "%s/slurm%u/slurm%u.%u_%d", if (snprintf(base, PATH_MAX, "%s/slurm%u",
CPUSET_DIR, job->jobid, job->jobid, job->stepid, CPUSET_DIR, job->jobid) > PATH_MAX) {
error("cpuset path too long");
return SLURM_ERROR;
}
if (snprintf(path, PATH_MAX, "%s/slurm%u.%u_%d",
base, job->jobid, job->stepid,
job->envtp->localid) > PATH_MAX) { job->envtp->localid) > PATH_MAX) {
error("cpuset path too long"); error("cpuset path too long");
return SLURM_ERROR; return SLURM_ERROR;
...@@ -201,7 +206,7 @@ int task_pre_launch ( slurmd_job_t *job ) ...@@ -201,7 +206,7 @@ int task_pre_launch ( slurmd_job_t *job )
if (get_cpuset(&new_mask, job) if (get_cpuset(&new_mask, job)
&& (!(job->cpu_bind_type & CPU_BIND_NONE))) { && (!(job->cpu_bind_type & CPU_BIND_NONE))) {
if (conf->use_cpusets) { if (conf->use_cpusets) {
setval = slurm_set_cpuset(path, mypid, setval = slurm_set_cpuset(base, path, mypid,
sizeof(new_mask), sizeof(new_mask),
&new_mask); &new_mask);
slurm_get_cpuset(path, mypid, slurm_get_cpuset(path, mypid,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment