From 0857ad1b77236a4585fea8261dfa9adde0f7376c Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Wed, 11 Feb 2009 18:03:46 +0000 Subject: [PATCH] Add DebugFlag configuration option of "CPU_Bind" for detailed CPU binding information to be logged. --- NEWS | 2 ++ doc/man/man5/slurm.conf.5 | 7 +++++-- slurm/slurm.h.in | 2 +- src/api/config_info.c | 8 ++++---- src/common/read_config.c | 9 ++++++++- src/common/select_job_res.c | 7 ++++--- src/common/select_job_res.h | 3 ++- src/plugins/select/cons_res/select_cons_res.c | 12 +++++++++++- 8 files changed, 37 insertions(+), 13 deletions(-) diff --git a/NEWS b/NEWS index affccfed6fe..40f3edb3082 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,8 @@ documents those changes that are of interest to users and admins. Andersson, National Supercomputer Centre, Sweden. -- Add support for the scontrol command to be able change the Weight associated with nodes. Patch from Krishnakumar Ravi[KK] (HP). + -- Add DebugFlag configuration option of "CPU_Bind" for detailed CPU + binding information to be logged. * Changes in SLURM 1.4.0-pre7 ============================= diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 42d717a3fb4..8d20987265d 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -240,11 +240,14 @@ Multiple subsystems can be specified with comma separators. Valid subsystems available today (with more to come) include: .RS .TP 15 -\fBTriggers\fR -Slurmctld triggers +\fBCPU_Bind\fR +CPU binding details for jobs and steps .TP \fBSteps\fR Slurmctld resource allocation for job steps +.TP +\fBTriggers\fR +Slurmctld triggers .RE .TP diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 30227885684..6581b888a55 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -1024,7 +1024,7 @@ typedef struct reservation_name_msg { #define DEBUG_FLAG_SELECT_TYPE 0x00000001 /* SelectType plugin */ #define DEBUG_FLAG_STEPS 0x00000002 /* slurmctld steps */ #define DEBUG_FLAG_TRIGGERS 0x00000004 /* slurmctld triggers */ - +#define DEBUG_FLAG_CPU_BIND 0x00000008 /* CPU binding */ typedef struct slurm_ctl_conf { time_t last_update; /* last update time of the build parameters */ diff --git a/src/api/config_info.c b/src/api/config_info.c index 21ddeae1aae..cd4255d9bd1 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -148,6 +148,10 @@ void slurm_print_ctl_conf ( FILE* out, fprintf(out, "CryptoType = %s\n", slurm_ctl_conf_ptr->crypto_type); + xbuf = debug_flags2str(slurm_ctl_conf_ptr->debug_flags); + fprintf(out, "DebugFlags = %s\n", xbuf); + xfree(xbuf); + if (slurm_ctl_conf_ptr->def_mem_per_task & MEM_PER_CPU) { fprintf(out, "DefMemPerCPU = %u MB\n", slurm_ctl_conf_ptr->def_mem_per_task & @@ -158,10 +162,6 @@ void slurm_print_ctl_conf ( FILE* out, } else fprintf(out, "DefMemPerCPU = UNLIMITED\n"); - xbuf = debug_flags2str(slurm_ctl_conf_ptr->debug_flags); - fprintf(out, "DebugFlags = %s\n", xbuf); - xfree(xbuf); - if (slurm_ctl_conf_ptr->disable_root_jobs) fprintf(out, "DisableRootJobs = YES\n"); else diff --git a/src/common/read_config.c b/src/common/read_config.c index beb52b6fbe2..f9d744f1736 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -2265,6 +2265,11 @@ extern char * debug_flags2str(uint32_t debug_flags) { char *rc = NULL; + if (debug_flags & DEBUG_FLAG_CPU_BIND) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "CPU_Bind"); + } if (debug_flags & DEBUG_FLAG_SELECT_TYPE) { if (rc) xstrcat(rc, ","); @@ -2299,7 +2304,9 @@ extern uint32_t debug_str2flags(char *debug_flags) tmp_str = xstrdup(debug_flags); tok = strtok_r(tmp_str, ",", &last); while (tok) { - if (strcasecmp(tok, "SelectType") == 0) + if (strcasecmp(tok, "CPU_Bind") == 0) + rc |= DEBUG_FLAG_CPU_BIND; + else if (strcasecmp(tok, "SelectType") == 0) rc |= DEBUG_FLAG_SELECT_TYPE; else if (strcasecmp(tok, "Steps") == 0) rc |= DEBUG_FLAG_STEPS; diff --git a/src/common/select_job_res.c b/src/common/select_job_res.c index e2f13715bf0..0c988a27113 100644 --- a/src/common/select_job_res.c +++ b/src/common/select_job_res.c @@ -409,7 +409,8 @@ extern void free_select_job_res(select_job_res_t *select_job_res_pptr) } /* Log the contents of a select_job_res data structure using info() */ -extern void log_select_job_res(select_job_res_t select_job_res_ptr) +extern void log_select_job_res(uint32_t job_id, + select_job_res_t select_job_res_ptr) { int bit_inx = 0, bit_reps, i; int array_size, node_inx; @@ -421,8 +422,8 @@ extern void log_select_job_res(select_job_res_t select_job_res_ptr) } info("===================="); - info("nhosts:%u nprocs:%u node_req:%u", - select_job_res_ptr->nhosts, select_job_res_ptr->nprocs, + info("job_id:%u nhosts:%u nprocs:%u node_req:%u", + job_id, select_job_res_ptr->nhosts, select_job_res_ptr->nprocs, select_job_res_ptr->node_req); if (select_job_res_ptr->cpus == NULL) { diff --git a/src/common/select_job_res.h b/src/common/select_job_res.h index aa462c644c8..719ad16a2b0 100644 --- a/src/common/select_job_res.h +++ b/src/common/select_job_res.h @@ -170,7 +170,8 @@ extern select_job_res_t copy_select_job_res(select_job_res_t extern void free_select_job_res(select_job_res_t *select_job_res_pptr); /* Log the contents of a select_job_res data structure using info() */ -extern void log_select_job_res(select_job_res_t select_job_res_ptr); +extern void log_select_job_res(uint32_t job_id, + select_job_res_t select_job_res_ptr); /* Un/pack full select_job_res data structure */ extern void pack_select_job_res(select_job_res_t select_job_res_ptr, diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index a954440eaae..f590832f0cf 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -1207,9 +1207,16 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, { int rc; uint16_t job_node_req; + bool debug_cpu_bind = false, debug_check = false; xassert(bitmap); + if (!debug_check) { + debug_check = true; + if (slurm_get_debug_flags() & DEBUG_FLAG_CPU_BIND) + debug_cpu_bind = true; + } + if (!job_ptr->details) return EINVAL; @@ -1237,10 +1244,13 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, #if (CR_DEBUG) if (job_ptr->select_job) - log_select_job_res(job_ptr->select_job); + log_select_job_res(job_ptr->job_id, job_ptr->select_job); else info("no select_job_res info for job %u", job_ptr->job_id); +#else + if (debug_cpu_bind) + log_select_job_res(job_ptr->job_id, job_ptr->select_job); #endif return rc; -- GitLab