From 1fd5eaf54a3959c6c6941f4f54d35d39d5051b67 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Thu, 16 Sep 2004 16:06:34 +0000 Subject: [PATCH] Add squeue options to print BGL specific options (geometry, node_use, conn_type, and rotate). slurmctld to set min_nodes for job based upon geometry (if supplied). This avoid trying to schedule jobs when insufficient resources are available. --- doc/man/man1/squeue.1 | 14 +++++- doc/man/man1/srun.1 | 12 +++++ src/api/job_info.c | 6 +-- src/slurmctld/job_mgr.c | 54 +++++++++++++++++++-- src/slurmctld/node_mgr.c | 2 +- src/slurmctld/node_scheduler.c | 2 +- src/squeue/opts.c | 20 ++++++++ src/squeue/print.c | 88 ++++++++++++++++++++++++++++++++++ src/squeue/print.h | 16 +++++++ src/srun/opt.c | 50 +++++++++++++++---- 10 files changed, 245 insertions(+), 19 deletions(-) diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index 7cf82414095..81c41a8cc18 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -1,4 +1,4 @@ -.TH SQUEUE "1" "August 2004" "squeue 0.4" "Slurm components" +.TH SQUEUE "1" "September 2004" "squeue 0.4" "Slurm components" .SH "NAME" squeue \- view information about jobs located in the SLURM scheduling queue. @@ -185,6 +185,18 @@ User ID \fB%x\fR List of node names explicitly excluded by the job .TP +\fB%y\fR +Geometry requirement of resource allocation (X,Y,Z dimensions). +.TP +\fB%Y\fR +Connection type: TORUS, MESH, or NAV (torus else mesh). +.TP +\fB%z\fR +Permit rotation of geometry (yes or no). +.TP +\fB%Z\fR +Node use: VIRTUAL or COPROCESSOR. +.TP \fB%.<*>\fR right justification of the field .TP diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 93ea9514682..484df3a85a3 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -603,6 +603,9 @@ SLURM_ACCOUNT SLURM_CPUS_PER_TASK \fB\-c, \-\-ncpus\-per\-task\fR=\fIn\fR .TP +SLURM_CONN_TYPE +\fB\-\-conn\-type\fR=(\fImesh|nav|torus\fR) +.TP SLURM_CORE_FORMAT \fB\-\-core\fR=\fIformat\fR .TP @@ -615,12 +618,21 @@ SLURMD_DEBUG SLURM_DISTRIBUTION \fB\-m, \-\-distribution\fR=(\fIblock|cyclic\fR) .TP +SLURM_GEOMETRY +\fB\-g, \-\-geometry\fR=\fIX,Y,Z\fR +.TP SLURM_LABELIO \fB-l, --label\fR .TP SLURM_NNODES \fB\-N, \-\-nodes\fR=(\fIn|min-max\fR) .TP +SLURM_NO_ROTATE +\fB\-\-no\-rotate\fR +.TP +SLURM_NODE_USE +\fB\-\-node\-use\fR=(\fIcoprocessor|virtual\fR) +.TP SLURM_NPROCS \fB\-n, \-\-ntasks\fR=\fIn\fR .TP diff --git a/src/api/job_info.c b/src/api/job_info.c index 0d6b3a1111c..ea9e3fdc279 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -237,10 +237,10 @@ slurm_print_job_info ( FILE* out, job_info_t * job_ptr, int one_liner ) fprintf ( out, "Rotate=YES "); else fprintf ( out, "Rotate=NO "); - if (job_ptr->node_use == RM_VIRTUAL) - fprintf ( out, "NodeUse=VIRTUAL "); - else + if (job_ptr->node_use == RM_COPROCESSOR) fprintf ( out, "NodeUse=COPROCESSOR "); + else + fprintf ( out, "NodeUse=VIRTUAL "); #endif fprintf( out, "\n\n"); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 17e1a680191..785db65772c 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1147,10 +1147,41 @@ void dump_job_desc(job_desc_msg_t * job_specs) dependency, job_specs->account); #ifdef HAVE_BGL - debug3(" conn_type=%u rotate=%u node_use=%u geometry=%u,%u,%u", - job_specs->conn_type, job_specs->rotate, job_specs->node_use, - job_specs->geometry[0], job_specs->geometry[1], - job_specs->geometry[2]); +{ + char *conn_type, *rotate, *node_use; + int geometry[SYSTEM_DIMENSIONS]; + + if (job_specs->conn_type == RM_MESH) + conn_type = "MESH"; + else if (job_specs->conn_type == RM_TORUS) + conn_type = "TORUS"; + else + conn_type = "NAV"; + + if (job_specs->rotate == 0) + rotate = "NO"; + else + rotate = "YES"; + + if (job_specs->node_use == RM_VIRTUAL) + node_use = "VIRTUAL"; + else + node_use = "COPROCESSOR"; + + if (job_specs->geometry[0] == (uint16_t) NO_VAL) { + geometry[0] = -1; + geometry[1] = -1; + geometry[2] = -1; + } else { + geometry[0] = job_specs->geometry[0]; + geometry[1] = job_specs->geometry[1]; + geometry[2] = job_specs->geometry[2]; + } + + debug3(" conn_type=%s rotate=%s node_use=%s geometry=%d,%d,%d", + conn_type, rotate, node_use, + geometry[0], geometry[1], geometry[2]); +} #endif } @@ -1653,6 +1684,21 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, if (job_desc->min_nodes == NO_VAL) job_desc->min_nodes = 1; +#ifdef SYSTEM_DIMENSIONS + if ((job_desc->geometry[0] != (uint16_t) NO_VAL) + && (job_desc->geometry[0] != 0)) { + int i, tot = 1; + for (i=0; i<SYSTEM_DIMENSIONS; i++) + tot *= job_desc->geometry[i]; + if (job_desc->min_nodes > tot) { + info("MinNodes(%d) > GeometryNodes(%d)", + job_desc->min_nodes, tot); + error_code = ESLURM_TOO_MANY_REQUESTED_CPUS; + goto cleanup; + } + job_desc->min_nodes = tot; + } +#endif if (job_desc->max_nodes == NO_VAL) job_desc->max_nodes = 0; if (job_desc->num_procs > part_ptr->total_cpus) { diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 6fb7e8fe4aa..a6a26950583 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -111,7 +111,7 @@ char * bitmap2node_name (bitstr_t *bitmap) hostlist_push(hl, node_record_table_ptr[i].name); } hostlist_uniq(hl); - hostlist_ranged_string(hl, 8192, buf); + hostlist_ranged_string(hl, sizeof(buf), buf); hostlist_destroy(hl); return xstrdup(buf); } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index dff0575cc22..615d1cd1874 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -386,7 +386,7 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, #ifdef HAVE_BGL if (shared) { - error("attempt to share Blue Gene nodes"); + error("attempt to share Blue Gene nodes ignored"); shared = 0; } #endif diff --git a/src/squeue/opts.c b/src/squeue/opts.c index e8c0b3a7771..4bd141fff1e 100644 --- a/src/squeue/opts.c +++ b/src/squeue/opts.c @@ -523,6 +523,26 @@ extern int parse_format( char* format ) field_size, right_justify, suffix ); + else if (field[0] == 'y') + job_format_add_geometry( params.format_list, + field_size, + right_justify, + suffix ); + else if (field[0] == 'Y') + job_format_add_conn_type( params.format_list, + field_size, + right_justify, + suffix ); + else if (field[0] == 'z') + job_format_add_rotate( params.format_list, + field_size, + right_justify, + suffix ); + else if (field[0] == 'Z') + job_format_add_node_use( params.format_list, + field_size, + right_justify, + suffix ); else error( "Invalid job format specification: %c", field[0] ); diff --git a/src/squeue/print.c b/src/squeue/print.c index 382277eeccd..59cf99d3575 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -792,6 +792,94 @@ int _print_job_dependency(job_info_t * job, int width, bool right_justify, return SLURM_SUCCESS; } +int _print_job_geometry(job_info_t * job, int width, bool right_justify, + char* suffix) +{ + if (job == NULL) /* Print the Header instead */ + _print_str("GEOMETRY", width, right_justify, true); + else { +#ifdef HAVE_BGL + char id[FORMAT_STRING_SIZE]; + snprintf(id, FORMAT_STRING_SIZE, "%u,%u,%u", + job->geometry[0], job->geometry[1], job->geometry[2]); + _print_str(id, width, right_justify, true); +#else + _print_str("n/a", width, right_justify, true); +#endif + } + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + +int _print_job_conn_type(job_info_t * job, int width, bool right_justify, + char* suffix) +{ + if (job == NULL) /* Print the Header instead */ + _print_str("CONN_TYPE", width, right_justify, true); + else { + char *id; +#ifdef HAVE_BGL + if (job->conn_type == RM_TORUS) + id = "torus"; + else if (job->conn_type == RM_MESH) + id = "mesh"; + else + id = "nav"; +#else + id = "n/a"; +#endif + _print_str(id, width, right_justify, true); + } + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + +int _print_job_node_use(job_info_t * job, int width, bool right_justify, + char* suffix) +{ + if (job == NULL) /* Print the Header instead */ + _print_str("NODE_USE", width, right_justify, true); + else { + char *id; +#ifdef HAVE_BGL + if (job->node_use == RM_COPROCESSOR) + id = "coprocessor"; + else + id = "virtual"; +#else + id = "n/a"; +#endif + _print_str(id, width, right_justify, true); + } + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + +int _print_job_rotate(job_info_t * job, int width, bool right_justify, + char* suffix) +{ + if (job == NULL) /* Print the Header instead */ + _print_str("ROTATE", width, right_justify, true); + else { + char *id; +#ifdef HAVE_BGL + if (job->rotate == 0) + id = "no"; + else + id = "yes"; +#else + id = "n/a"; +#endif + _print_str(id, width, right_justify, true); + } + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + /***************************************************************************** * Job Step Print Functions *****************************************************************************/ diff --git a/src/squeue/print.h b/src/squeue/print.h index 68aea253eea..6728541f22b 100644 --- a/src/squeue/print.h +++ b/src/squeue/print.h @@ -136,6 +136,14 @@ int job_format_add_function(List list, int width, bool right_justify, job_format_add_function(list,wid,right,suffix,_print_job_account) #define job_format_add_dependency(list,wid,right,suffix) \ job_format_add_function(list,wid,right,suffix,_print_job_dependency) +#define job_format_add_geometry(list,wid,right,suffix) \ + job_format_add_function(list,wid,right,suffix,_print_job_geometry) +#define job_format_add_rotate(list,wid,right,suffix) \ + job_format_add_function(list,wid,right,suffix,_print_job_rotate) +#define job_format_add_conn_type(list,wid,right,suffix) \ + job_format_add_function(list,wid,right,suffix,_print_job_conn_type) +#define job_format_add_node_use(list,wid,right,suffix) \ + job_format_add_function(list,wid,right,suffix,_print_job_node_use) /***************************************************************************** * Job Line Print Functions @@ -208,6 +216,14 @@ int _print_job_account(job_info_t * job, int width, bool right_justify, char* suffix); int _print_job_dependency(job_info_t * job, int width, bool right_justify, char* suffix); +int _print_job_geometry(job_info_t * job, int width, bool right_justify, + char* suffix); +int _print_job_rotate(job_info_t * job, int width, bool right_justify, + char* suffix); +int _print_job_conn_type(job_info_t * job, int width, bool right_justify, + char* suffix); +int _print_job_node_use(job_info_t * job, int width, bool right_justify, + char* suffix); /***************************************************************************** * Step Print Format Functions diff --git a/src/srun/opt.c b/src/srun/opt.c index 9863c5cbba8..2fc9faee868 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -74,6 +74,10 @@ #define OPT_NODES 0x05 #define OPT_OVERCOMMIT 0x06 #define OPT_CORE 0x07 +#define OPT_CONN_TYPE 0x08 +#define OPT_NODE_USE 0x09 +#define OPT_NO_ROTATE 0x0a +#define OPT_GEOMETRY 0x0b /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_HELP 0x100 @@ -214,11 +218,12 @@ static bool _valid_node_list(char **node_list_pptr) */ static enum distribution_t _verify_dist_type(const char *arg) { + int len = strlen(arg); enum distribution_t result = SRUN_DIST_UNKNOWN; - if (strncasecmp(arg, "cyclic", strlen(arg)) == 0) + if (strncasecmp(arg, "cyclic", len) == 0) result = SRUN_DIST_CYCLIC; - else if (strncasecmp(arg, "block", strlen(arg)) == 0) + else if (strncasecmp(arg, "block", len) == 0) result = SRUN_DIST_BLOCK; return result; @@ -230,11 +235,13 @@ static enum distribution_t _verify_dist_type(const char *arg) */ static int _verify_conn_type(const char *arg) { - if (!strcasecmp(arg, "MESH")) + int len = strlen(arg); + + if (!strncasecmp(arg, "MESH", len)) return RM_MESH; - else if (!strcasecmp(arg, "TORUS")) + else if (!strncasecmp(arg, "TORUS", len)) return RM_TORUS; - else if (!strcasecmp(arg, "NAV")) + else if (!strncasecmp(arg, "NAV", len)) return RM_NAV; error("invalid --conn-type argument %s ignored.", arg); @@ -247,9 +254,11 @@ static int _verify_conn_type(const char *arg) */ static int _verify_node_use(const char *arg) { - if (!strcasecmp(arg, "VIRTUAL")) + int len = strlen(arg); + + if (!strncasecmp(arg, "VIRTUAL", len)) return RM_VIRTUAL; - else if (!strcasecmp(arg, "COPROCESSOR")) + else if (!strncasecmp(arg, "COPROCESSOR", len)) return RM_COPROCESSOR; error("invalid --node-use argument %s ignored.", arg); @@ -543,13 +552,17 @@ env_vars_t env_vars[] = { {"SLURM_ACCOUNT", OPT_STRING, &opt.account, NULL }, {"SLURMD_DEBUG", OPT_INT, &opt.slurmd_debug, NULL }, {"SLURM_CPUS_PER_TASK", OPT_INT, &opt.cpus_per_task, &opt.cpus_set }, - {"SLURM_CORE_FORMAT", OPT_CORE, NULL, NULL, }, + {"SLURM_CONN_TYPE", OPT_CONN_TYPE, NULL, NULL }, + {"SLURM_CORE_FORMAT", OPT_CORE, NULL, NULL }, {"SLURM_DEBUG", OPT_DEBUG, NULL, NULL }, {"SLURM_DISTRIBUTION", OPT_DISTRIB, NULL, NULL }, + {"SLURM_GEOMETRY", OPT_GEOMETRY, NULL, NULL }, {"SLURM_IMMEDIATE", OPT_INT, &opt.immediate, NULL }, {"SLURM_JOBID", OPT_INT, &opt.jobid, NULL }, {"SLURM_LABELIO", OPT_INT, &opt.labelio, NULL }, {"SLURM_NNODES", OPT_NODES, NULL, NULL }, + {"SLURM_NO_ROTATE", OPT_NO_ROTATE, NULL, NULL }, + {"SLURM_NODE_USE", OPT_NODE_USE, NULL, NULL }, {"SLURM_NPROCS", OPT_INT, &opt.nprocs, &opt.nprocs_set}, {"SLURM_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SLURM_PARTITION", OPT_STRING, &opt.partition, NULL }, @@ -639,7 +652,26 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_CORE: opt.core_type = core_format_type (val); break; - + + case OPT_CONN_TYPE: + opt.conn_type = _verify_conn_type(val); + break; + + case OPT_NODE_USE: + opt.node_use = _verify_node_use(val); + break; + + case OPT_NO_ROTATE: + opt.no_rotate = true; + break; + + case OPT_GEOMETRY: + if (_verify_geometry(val, opt.geometry)) { + error("\"%s=%s\" -- invalid geometry, ignoring...", + e->var, val); + } + break; + default: /* do nothing */ break; -- GitLab