diff --git a/slurm/slurmdb.h b/slurm/slurmdb.h index 979172d726760b93ffad48579661ff63425ffb84..d7b8cb7369e67030c45a849bf1ac4563fdf99821 100644 --- a/slurm/slurmdb.h +++ b/slurm/slurmdb.h @@ -414,6 +414,7 @@ typedef struct { uint32_t flags; /* set of CLUSTER_FLAG_* */ char *name; char *nodes; + uint32_t plugin_id_select; /* id of the select plugin */ slurmdb_association_rec_t *root_assoc; /* root association for * cluster */ uint16_t rpc_version; /* version of rpc this cluter is running */ diff --git a/src/common/env.c b/src/common/env.c index 8e28e0b3f458def6ee6a2ae14e946e61146790e5..f7038dd3fc7b4c25a37ba1f20da22b5f0433abda 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -63,6 +63,7 @@ #include "src/common/node_select.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_step_layout.h" +#include "src/common/slurmdb_defs.h" /* * Define slurm-specific aliases for use by plugins, see slurm_xlator.h @@ -82,6 +83,66 @@ strong_alias(env_array_overwrite_fmt, slurm_env_array_overwrite_fmt); #define ENV_BUFSIZE (256 * 1024) +static int _setup_perticulars(uint32_t cluster_flags, + char ***dest, + dynamic_plugin_data_t *select_jobinfo) +{ + int rc = SLURM_SUCCESS; + if(cluster_flags & CLUSTER_FLAG_BG) { + char *bg_part_id = NULL; + select_g_select_jobinfo_get(select_jobinfo, + SELECT_JOBDATA_BLOCK_ID, + &bg_part_id); + if (bg_part_id) { + if(cluster_flags & CLUSTER_FLAG_BGL) { + uint16_t conn_type = + (uint16_t)NO_VAL; + select_g_select_jobinfo_get( + select_jobinfo, + SELECT_JOBDATA_CONN_TYPE, + &conn_type); + if(conn_type > SELECT_SMALL) { + env_array_overwrite_fmt(dest, + "SUBMIT_POOL", + "%s", + bg_part_id); + } + } + env_array_overwrite_fmt(dest, "MPIRUN_PARTITION", "%s", + bg_part_id); + env_array_overwrite_fmt(dest, "MPIRUN_NOFREE", + "%d", 1); + env_array_overwrite_fmt(dest, "MPIRUN_NOALLOCATE", + "%d", 1); + xfree(bg_part_id); + } else + rc = SLURM_FAILURE; + + if(rc == SLURM_FAILURE) + error("Can't set MPIRUN_PARTITION " + "environment variable"); + } else if(cluster_flags & CLUSTER_FLAG_CRAYXT) { + char *resv_id = NULL; + select_g_select_jobinfo_get(select_jobinfo, + SELECT_JOBDATA_RESV_ID, + &resv_id); + if (resv_id) { + env_array_overwrite_fmt(dest, "BASIL_RESERVATION_ID", + "%s", resv_id); + } else + rc = SLURM_FAILURE; + + if(rc == SLURM_FAILURE) + error("Can't set BASIL_RESVERATION_ID " + "environment variable"); + xfree(resv_id); + } else if(cluster_flags & CLUSTER_FLAG_AIX) { + env_array_overwrite(dest, "LOADLBATCH", "yes"); + } + + return rc; +} + /* * Return pointer to `name' entry in environment if found, or * pointer to the last entry (i.e. NULL) if `name' is not @@ -310,6 +371,7 @@ int setup_env(env_t *env, bool preserve_env) int rc = SLURM_SUCCESS; char *dist = NULL, *lllp_dist = NULL; char addrbuf[INET_ADDRSTRLEN]; + uint32_t cluster_flags = slurmdb_setup_cluster_flags(); if (env == NULL) return SLURM_ERROR; @@ -606,56 +668,8 @@ int setup_env(env_t *env, bool preserve_env) } if(env->select_jobinfo) { -#ifdef HAVE_BG - char *bgl_part_id = NULL; - select_g_select_jobinfo_get(env->select_jobinfo, - SELECT_JOBDATA_BLOCK_ID, &bgl_part_id); - if (bgl_part_id) { -#ifndef HAVE_BGL - uint16_t conn_type = (uint16_t)NO_VAL; - select_g_select_jobinfo_get(env->select_jobinfo, - SELECT_JOBDATA_CONN_TYPE, - &conn_type); - if(conn_type > SELECT_SMALL) { - if(setenvf(&env->env, - "SUBMIT_POOL", "%s", bgl_part_id)) - rc = SLURM_FAILURE; - } -#endif - if(setenvf(&env->env, - "MPIRUN_PARTITION", "%s", bgl_part_id)) - rc = SLURM_FAILURE; - - if(setenvf(&env->env, "MPIRUN_NOFREE", "%d", 1)) - rc = SLURM_FAILURE; - if(setenvf(&env->env, "MPIRUN_NOALLOCATE", "%d", 1)) - rc = SLURM_FAILURE; - xfree(bgl_part_id); - } else - rc = SLURM_FAILURE; - - if(rc == SLURM_FAILURE) - error("Can't set MPIRUN_PARTITION " - "environment variable"); - -#endif - -#ifdef HAVE_CRAY_XT - char *resv_id = NULL; - select_g_select_jobinfo_get(env->select_jobinfo, - SELECT_JOBDATA_RESV_ID, &resv_id); - if (resv_id) { - if(setenvf(&env->env, - "BASIL_RESVERATION_ID", "%s", resv_id)) - rc = SLURM_FAILURE; - } else - rc = SLURM_FAILURE; - - if(rc == SLURM_FAILURE) - error("Can't set BASIL_RESVERATION_ID " - "environment variable"); - xfree(resv_id); -#endif + _setup_perticulars(cluster_flags, &env->env, + env->select_jobinfo); } if (env->jobid >= 0) { @@ -740,8 +754,7 @@ int setup_env(env_t *env, bool preserve_env) rc = SLURM_FAILURE; } -#ifdef HAVE_AIX - { + if(cluster_flags & CLUSTER_FLAG_AIX) { char res_env[128]; char *debug_env = (char *)getenv("SLURM_LL_API_DEBUG"); int debug_num = 0; @@ -761,7 +774,6 @@ int setup_env(env_t *env, bool preserve_env) setenvf(&env->env, "LOADLBATCH", "yes"); setenvf(&env->env, "LOADL_ACTIVE", "3.2.0"); } -#endif if (env->pty_port && setenvf(&env->env, "SLURM_PTY_PORT", "%hu", env->pty_port)) { @@ -891,7 +903,7 @@ extern char *uint32_compressed_to_str(uint32_t array_len, * SLURM_JOB_CPUS_PER_NODE * LOADLBATCH (AIX only) * SLURM_BG_NUM_NODES, MPIRUN_PARTITION, MPIRUN_NOFREE, and - * MPIRUN_NOALLOCATE (BGL only) + * MPIRUN_NOALLOCATE (BG only) * * Sets OBSOLETE variables (needed for MPI, do not remove): * SLURM_JOBID @@ -903,24 +915,26 @@ int env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, const job_desc_msg_t *desc) { -#ifdef HAVE_CRAY_XT - char *resv_id = NULL; -#endif char *tmp = NULL; char *dist = NULL, *lllp_dist = NULL; slurm_step_layout_t *step_layout = NULL; uint32_t num_tasks = desc->num_tasks; int rc = SLURM_SUCCESS; uint32_t node_cnt = alloc->node_cnt; + uint32_t cluster_flags = slurmdb_setup_cluster_flags(); -#ifdef HAVE_BG - select_g_select_jobinfo_get(alloc->select_jobinfo, - SELECT_JOBDATA_NODE_CNT, - &node_cnt); - if(!node_cnt) - node_cnt = alloc->node_cnt; - env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", "%u", node_cnt); -#endif + _setup_perticulars(cluster_flags, dest, alloc->select_jobinfo); + + if(cluster_flags & CLUSTER_FLAG_BG) { + select_g_select_jobinfo_get(alloc->select_jobinfo, + SELECT_JOBDATA_NODE_CNT, + &node_cnt); + if(!node_cnt) + node_cnt = alloc->node_cnt; + info("got here 2"); + env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", + "%u", node_cnt); + } env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", node_cnt); @@ -946,45 +960,6 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); xfree(tmp); -#ifdef HAVE_AIX - /* this puts the "poe" command into batch mode */ - env_array_overwrite(dest, "LOADLBATCH", "yes"); -#endif - -#ifdef HAVE_BG - /* BlueGene only */ - select_g_select_jobinfo_get(alloc->select_jobinfo, - SELECT_JOBDATA_BLOCK_ID, - &tmp); - if (tmp) { -#ifndef HAVE_BGL - uint16_t conn_type = (uint16_t)NO_VAL; - select_g_select_jobinfo_get(alloc->select_jobinfo, - SELECT_JOBDATA_CONN_TYPE, &conn_type); - if(conn_type > SELECT_SMALL) { - env_array_overwrite_fmt(dest, "SUBMIT_POOL", "%s", - tmp); - } -#endif - env_array_overwrite_fmt(dest, "MPIRUN_PARTITION", "%s", - tmp); - env_array_overwrite_fmt(dest, "MPIRUN_NOFREE", "%d", 1); - env_array_overwrite_fmt(dest, "MPIRUN_NOALLOCATE", "%d", 1); - - xfree(tmp); - } -#endif - -#ifdef HAVE_CRAY_XT - select_g_select_jobinfo_get(alloc->select_jobinfo, - SELECT_JOBDATA_RESV_ID, - &resv_id); - if (resv_id) { - env_array_overwrite_fmt(dest, "BASIL_RESERVATION_ID", "%s", - resv_id); - } -#endif - /* OBSOLETE, but needed by MPI, do not remove */ env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", node_cnt); @@ -1069,6 +1044,9 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, uint32_t num_tasks = batch->ntasks; uint16_t cpus_per_task; uint16_t task_dist; + uint32_t cluster_flags = slurmdb_setup_cluster_flags(); + + _setup_perticulars(cluster_flags, dest, batch->select_jobinfo); /* There is no explicit node count in the batch structure, * so we need to calculate the node count. */ @@ -1079,9 +1057,11 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", num_nodes); -#ifdef HAVE_BG - env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", "%u", num_nodes); -#endif + if(cluster_flags & CLUSTER_FLAG_BG) { + info("got here"); + env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", + "%u", num_nodes); + } env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes); tmp = uint32_compressed_to_str(batch->num_cpu_groups, @@ -1093,10 +1073,6 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH"); if (node_name) env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name); -#ifdef HAVE_AIX - /* this puts the "poe" command into batch mode */ - env_array_overwrite(dest, "LOADLBATCH", "yes"); -#endif /* OBSOLETE, but needed by MPI, do not remove */ env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id); diff --git a/src/common/hostlist.c b/src/common/hostlist.c index 34c0041d9df89aecab40e0458ac60b9aac9a572e..a802d1360857cf3535958ef10f4820d3779e2487 100644 --- a/src/common/hostlist.c +++ b/src/common/hostlist.c @@ -277,9 +277,6 @@ enum {A, B, C, D}; # define SYSTEM_DIMENSIONS 1 #endif -/* largest configured system dimensions */ -#define HIGHEST_DIMENSIONS 4 -#define HIGHEST_BASE 36 /* logic for block node description */ /* to speed things up we will do some calculations once to avoid diff --git a/src/common/hostlist.h b/src/common/hostlist.h index e0031980c8040e385689349c7e741472a8c84fe2..450950eb93aa14039ac9c83bf6bee65b50abfa92 100644 --- a/src/common/hostlist.h +++ b/src/common/hostlist.h @@ -58,6 +58,10 @@ #define HOSTLIST_BASE 10 #endif +/* largest configured system dimensions */ +#define HIGHEST_DIMENSIONS 4 +#define HIGHEST_BASE 36 + extern char *alpha_num; /* Notes: diff --git a/src/common/node_select.c b/src/common/node_select.c index dda5874859466131048e5def22f8b89f46cdf221..b89e2fc95a2fab2ae7cea944574f778cea0ea7d1 100644 --- a/src/common/node_select.c +++ b/src/common/node_select.c @@ -329,10 +329,16 @@ static int _unpack_block_info(block_info_t *block_info, Buf buffer, if(protocol_version >= SLURM_2_1_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&block_info->bg_block_id, &uint32_tmp, buffer); + if(working_cluster_rec) { + if(working_cluster_rec->flags & CLUSTER_FLAG_BGL) + safe_unpackstr_xmalloc(&block_info->blrtsimage, + &uint32_tmp, buffer); + } else { #ifdef HAVE_BGL - safe_unpackstr_xmalloc(&block_info->blrtsimage, - &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&block_info->blrtsimage, + &uint32_tmp, buffer); #endif + } safe_unpackstr_xmalloc(&bp_inx_str, &uint32_tmp, buffer); if (bp_inx_str == NULL) { block_info->bp_inx = bitfmt2int(""); @@ -358,9 +364,14 @@ static int _unpack_block_info(block_info_t *block_info, Buf buffer, safe_unpackstr_xmalloc(&(block_info->nodes), &uint32_tmp, buffer); safe_unpack32(&block_info->node_cnt, buffer); + if(working_cluster_rec) { + if(working_cluster_rec->flags & CLUSTER_FLAG_BGL) + safe_unpack16(&block_info->node_use, buffer); + } else { #ifdef HAVE_BGL - safe_unpack16(&block_info->node_use, buffer); + safe_unpack16(&block_info->node_use, buffer); #endif + } safe_unpackstr_xmalloc(&block_info->owner_name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&block_info->ramdiskimage, @@ -390,9 +401,15 @@ extern void node_select_pack_block_info(block_info_t *block_info, Buf buffer, if(protocol_version >= SLURM_2_1_PROTOCOL_VERSION) { if(!block_info) { packnull(buffer); + if(working_cluster_rec) { + if(working_cluster_rec->flags + & CLUSTER_FLAG_BGL) + packnull(buffer); + } else { #ifdef HAVE_BGL - packnull(buffer); + packnull(buffer); #endif + } pack16((uint16_t)NO_VAL, buffer); packnull(buffer); @@ -405,10 +422,15 @@ extern void node_select_pack_block_info(block_info_t *block_info, Buf buffer, packnull(buffer); packnull(buffer); pack32(NO_VAL, buffer); + if(working_cluster_rec) { + if(working_cluster_rec->flags + & CLUSTER_FLAG_BGL) + pack16((uint16_t)NO_VAL, buffer); + } else { #ifdef HAVE_BGL - pack16((uint16_t)NO_VAL, buffer); + pack16((uint16_t)NO_VAL, buffer); #endif - + } packnull(buffer); packnull(buffer); pack16((uint16_t)NO_VAL, buffer); @@ -536,19 +558,22 @@ extern int slurm_select_init(void) select_type = slurm_get_select_type(); + if(working_cluster_rec) { + /* just ignore warnings here */ + } else { #ifdef HAVE_XCPU - if(strcasecmp(select_type, "select/linear")) { - error("%s is incompatible with XCPU use", select_type); - fatal("Use SelectType=select/linear"); - } + if(strcasecmp(select_type, "select/linear")) { + error("%s is incompatible with XCPU use", select_type); + fatal("Use SelectType=select/linear"); + } #endif - #ifdef HAVE_BG - if(strcasecmp(select_type, "select/bluegene")) { - error("%s is incompatible with BlueGene", select_type); - fatal("Use SelectType=select/bluegene"); - } + if(strcasecmp(select_type, "select/bluegene")) { + error("%s is incompatible with BlueGene", select_type); + fatal("Use SelectType=select/bluegene"); + } #endif + } select_context_cnt = 0; names = xstrdup(select_plugin_list); one_name = strtok_r(names, ",", &last); @@ -627,6 +652,30 @@ fini: slurm_mutex_unlock(&select_context_lock); return rc; } +extern int select_get_plugin_id_pos(uint32_t plugin_id) +{ + int i; + + if (slurm_select_init() < 0) + return SLURM_ERROR; + + for (i=0; i<select_context_cnt; i++) { + if(*(select_context[i].ops.plugin_id) == plugin_id) + break; + } + if(i >= select_context_cnt) + return SLURM_ERROR; + return i; +} + +extern int select_get_plugin_id() +{ + if (slurm_select_init() < 0) + return 0; + + return *(select_context[select_context_default].ops.plugin_id); +} + /* * Save any global state information * IN dir_name - directory into which the data can be stored @@ -831,16 +880,23 @@ extern int select_g_select_nodeinfo_pack(dynamic_plugin_data_t *nodeinfo, uint16_t protocol_version) { void *data = NULL; + uint32_t plugin_id; if (slurm_select_init() < 0) return SLURM_ERROR; + + if(nodeinfo) { + data = nodeinfo->data; + plugin_id = nodeinfo->plugin_id; + } else + plugin_id = select_context_default; + if(protocol_version >= SLURM_2_2_PROTOCOL_VERSION) - pack32(*(select_context[select_context_default].ops.plugin_id), + pack32(*(select_context[plugin_id].ops.plugin_id), buffer); - if(nodeinfo) - data = nodeinfo->data; - return (*(select_context[select_context_default].ops. - nodeinfo_pack))(nodeinfo->data, buffer, protocol_version); + + return (*(select_context[plugin_id].ops. + nodeinfo_pack))(data, buffer, protocol_version); } extern int select_g_select_nodeinfo_unpack(dynamic_plugin_data_t **nodeinfo, @@ -881,12 +937,17 @@ unpack_error: extern dynamic_plugin_data_t *select_g_select_nodeinfo_alloc(uint32_t size) { dynamic_plugin_data_t *nodeinfo_ptr = NULL; + uint32_t plugin_id; if (slurm_select_init() < 0) return NULL; + + plugin_id = working_cluster_rec ? + working_cluster_rec->plugin_id_select : select_context_default; + nodeinfo_ptr = xmalloc(sizeof(dynamic_plugin_data_t)); - nodeinfo_ptr->plugin_id = select_context_default; - nodeinfo_ptr->data = (*(select_context[select_context_default].ops. + nodeinfo_ptr->plugin_id = plugin_id; + nodeinfo_ptr->data = (*(select_context[plugin_id].ops. nodeinfo_alloc))(size); return nodeinfo_ptr; } @@ -931,7 +992,7 @@ extern int select_g_select_nodeinfo_get(dynamic_plugin_data_t *nodeinfo, void *data) { void *nodedata = NULL; - uint32_t plugin_id = select_context_default; + uint32_t plugin_id; if (slurm_select_init() < 0) return SLURM_ERROR; @@ -939,7 +1000,9 @@ extern int select_g_select_nodeinfo_get(dynamic_plugin_data_t *nodeinfo, if(nodeinfo) { nodedata = nodeinfo->data; plugin_id = nodeinfo->plugin_id; - } + } else + plugin_id = select_context_default; + return (*(select_context[plugin_id].ops.nodeinfo_get)) (nodedata, dinfo, state, data); } @@ -1283,11 +1346,17 @@ extern char *select_g_select_jobinfo_xstrdup( extern dynamic_plugin_data_t *select_g_select_jobinfo_alloc() { dynamic_plugin_data_t *jobinfo_ptr = NULL; + uint32_t plugin_id; + if (slurm_select_init() < 0) return NULL; + + plugin_id = working_cluster_rec ? + working_cluster_rec->plugin_id_select : select_context_default; + jobinfo_ptr = xmalloc(sizeof(dynamic_plugin_data_t)); - jobinfo_ptr->plugin_id = select_context_default; - jobinfo_ptr->data = (*(select_context[select_context_default].ops. + jobinfo_ptr->plugin_id = plugin_id; + jobinfo_ptr->data = (*(select_context[plugin_id].ops. jobinfo_alloc))(); return jobinfo_ptr; } @@ -1315,7 +1384,7 @@ extern int select_g_select_jobinfo_set(dynamic_plugin_data_t *jobinfo, void *data) { void *jobdata = NULL; - uint32_t plugin_id = select_context_default; + uint32_t plugin_id; if (slurm_select_init() < 0) return SLURM_ERROR; @@ -1323,7 +1392,9 @@ extern int select_g_select_jobinfo_set(dynamic_plugin_data_t *jobinfo, if(jobinfo) { jobdata = jobinfo->data; plugin_id = jobinfo->plugin_id; - } + } else + plugin_id = select_context_default; + return (*(select_context[plugin_id].ops.jobinfo_set)) (jobdata, data_type, data); } @@ -1338,7 +1409,7 @@ extern int select_g_select_jobinfo_get(dynamic_plugin_data_t *jobinfo, void *data) { void *jobdata = NULL; - int plugin_id = select_context_default; + uint32_t plugin_id; if (slurm_select_init() < 0) return SLURM_ERROR; @@ -1346,7 +1417,9 @@ extern int select_g_select_jobinfo_get(dynamic_plugin_data_t *jobinfo, if(jobinfo) { jobdata = jobinfo->data; plugin_id = jobinfo->plugin_id; - } + } else + plugin_id = select_context_default; + return (*(select_context[plugin_id].ops.jobinfo_get)) (jobdata, data_type, data); } @@ -1384,15 +1457,21 @@ extern int select_g_select_jobinfo_pack(dynamic_plugin_data_t *jobinfo, uint16_t protocol_version) { void *data = NULL; + uint32_t plugin_id; + if (slurm_select_init() < 0) return SLURM_ERROR; + if(jobinfo) { + data = jobinfo->data; + plugin_id = jobinfo->plugin_id; + } else + plugin_id = select_context_default; + if(protocol_version >= SLURM_2_2_PROTOCOL_VERSION) - pack32(*(select_context[select_context_default].ops.plugin_id), + pack32(*(select_context[plugin_id].ops.plugin_id), buffer); - if(jobinfo) - data = jobinfo->data; - return (*(select_context[select_context_default].ops. + return (*(select_context[plugin_id].ops. jobinfo_pack))(data, buffer, protocol_version); } @@ -1449,12 +1528,17 @@ extern char *select_g_select_jobinfo_sprint(dynamic_plugin_data_t *jobinfo, char *buf, size_t size, int mode) { void *data = NULL; + uint32_t plugin_id; + if (slurm_select_init() < 0) return NULL; - - if(jobinfo) + if(jobinfo) { data = jobinfo->data; - return (*(select_context[select_context_default].ops. + plugin_id = jobinfo->plugin_id; + } else + plugin_id = select_context_default; + + return (*(select_context[plugin_id].ops. jobinfo_sprint)) (data, buf, size, mode); } @@ -1467,12 +1551,18 @@ extern char *select_g_select_jobinfo_xstrdup( dynamic_plugin_data_t *jobinfo, int mode) { void *data = NULL; + uint32_t plugin_id; + if (slurm_select_init() < 0) return NULL; - if(jobinfo) + if(jobinfo) { data = jobinfo->data; - return (*(select_context[select_context_default].ops. + plugin_id = jobinfo->plugin_id; + } else + plugin_id = select_context_default; + + return (*(select_context[plugin_id].ops. jobinfo_xstrdup))(data, mode); } diff --git a/src/common/node_select.h b/src/common/node_select.h index 34d4ec687645cbec8d6cccee467810833337bdb8..8d0e5e036dd8c4c4c03e61ccf9da553b6e4e19b7 100644 --- a/src/common/node_select.h +++ b/src/common/node_select.h @@ -95,6 +95,9 @@ extern int slurm_select_init(void); */ extern int slurm_select_fini(void); +extern int select_get_plugin_id_pos(uint32_t plugin_id); +extern int select_get_plugin_id(); + /* * Save any global state information * IN dir_name - directory into which the data can be stored diff --git a/src/common/proc_args.c b/src/common/proc_args.c index fcf623f9e68b01bad22b86ddffd281d3c92037bf..468bb89f95be190569757bf719a143a34546f6e8 100644 --- a/src/common/proc_args.c +++ b/src/common/proc_args.c @@ -155,8 +155,18 @@ task_dist_states_t verify_dist_type(const char *arg, uint32_t *plane_size) */ uint16_t verify_conn_type(const char *arg) { -#ifdef HAVE_BG uint16_t len = strlen(arg); + bool no_bgl = 1; + + if(working_cluster_rec) { + if(working_cluster_rec->flags & CLUSTER_FLAG_BGL) + no_bgl = 0; + } else { +#ifdef HAVE_BGL + no_bgl = 0; +#endif + } + if(!len) { /* no input given */ error("no conn-type argument given."); @@ -167,18 +177,17 @@ uint16_t verify_conn_type(const char *arg) return SELECT_TORUS; else if (!strncasecmp(arg, "NAV", len)) return SELECT_NAV; -#ifndef HAVE_BGL - else if (!strncasecmp(arg, "HTC", len) - || !strncasecmp(arg, "HTC_S", len)) - return SELECT_HTC_S; - else if (!strncasecmp(arg, "HTC_D", len)) - return SELECT_HTC_D; - else if (!strncasecmp(arg, "HTC_V", len)) - return SELECT_HTC_V; - else if (!strncasecmp(arg, "HTC_L", len)) - return SELECT_HTC_L; -#endif -#endif + else if (no_bgl) { + if(!strncasecmp(arg, "HTC", len) + || !strncasecmp(arg, "HTC_S", len)) + return SELECT_HTC_S; + else if (!strncasecmp(arg, "HTC_D", len)) + return SELECT_HTC_D; + else if (!strncasecmp(arg, "HTC_V", len)) + return SELECT_HTC_V; + else if (!strncasecmp(arg, "HTC_L", len)) + return SELECT_HTC_L; + } error("invalid conn-type argument '%s' ignored.", arg); return (uint16_t)NO_VAL; } @@ -193,9 +202,11 @@ int verify_geometry(const char *arg, uint16_t *geometry) int i, rc = 0; char* geometry_tmp = xstrdup(arg); char* original_ptr = geometry_tmp; + int dims = working_cluster_rec ? + working_cluster_rec->dimensions : SYSTEM_DIMENSIONS; token = strtok_r(geometry_tmp, delimiter, &next_ptr); - for (i=0; i<SYSTEM_DIMENSIONS; i++) { + for (i=0; i<dims; i++) { if (token == NULL) { error("insufficient dimensions in --geometry"); rc = -1; @@ -745,12 +756,14 @@ char *print_geometry(const uint16_t *geometry) { int i; char buf[32], *rc = NULL; + int dims = working_cluster_rec ? + working_cluster_rec->dimensions : SYSTEM_DIMENSIONS; - if ((SYSTEM_DIMENSIONS == 0) + if ((dims == 0) || (geometry[0] == (uint16_t)NO_VAL)) return NULL; - for (i=0; i<SYSTEM_DIMENSIONS; i++) { + for (i=0; i<dims; i++) { if (i > 0) snprintf(buf, sizeof(buf), "x%u", geometry[i]); else diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 7e775ad1e77ae77f5b6d0f24000fb523d8346350..3e16191f753c8e9ae46912a582654544e83f0add 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -169,7 +169,6 @@ struct slurm_job_credential { * default=0 (no limit) */ uint32_t step_mem_limit; -#ifndef HAVE_BG uint16_t core_array_size; /* core/socket array size */ uint16_t *cores_per_socket; uint16_t *sockets_per_node; @@ -179,7 +178,6 @@ struct slurm_job_credential { uint32_t job_nhosts; /* count of nodes allocated to JOB */ char *job_hostlist; /* list of nodes allocated to JOB */ bitstr_t *step_core_bitmap; -#endif time_t ctime; /* time of credential creation */ char *step_hostlist;/* hostnames for which the cred is ok */ @@ -348,53 +346,53 @@ _slurm_crypto_get_ops( slurm_crypto_context_t *c ) int rc = 0; /* Find the correct plugin. */ - c->cur_plugin = plugin_load_and_link(c->crypto_type, n_syms, syms, + c->cur_plugin = plugin_load_and_link(c->crypto_type, n_syms, syms, (void **) &c->ops); - if ( c->cur_plugin != PLUGIN_INVALID_HANDLE ) - return &c->ops; + if ( c->cur_plugin != PLUGIN_INVALID_HANDLE ) + return &c->ops; error("Couldn't find the specified plugin name for %s " "looking at all files", c->crypto_type); /* Get the plugin list, if needed. */ - if ( c->plugin_list == NULL ) { + if ( c->plugin_list == NULL ) { char *plugin_dir; - c->plugin_list = plugrack_create(); - if ( c->plugin_list == NULL ) { - error( "Unable to create a plugin manager" ); - return NULL; - } - - plugrack_set_major_type( c->plugin_list, "crypto" ); - plugrack_set_paranoia( c->plugin_list, + c->plugin_list = plugrack_create(); + if ( c->plugin_list == NULL ) { + error( "Unable to create a plugin manager" ); + return NULL; + } + + plugrack_set_major_type( c->plugin_list, "crypto" ); + plugrack_set_paranoia( c->plugin_list, PLUGRACK_PARANOIA_NONE, 0 ); plugin_dir = slurm_get_plugin_dir(); - plugrack_read_dir( c->plugin_list, plugin_dir ); + plugrack_read_dir( c->plugin_list, plugin_dir ); xfree(plugin_dir); - } + } - /* Find the correct plugin. */ - c->cur_plugin = + /* Find the correct plugin. */ + c->cur_plugin = plugrack_use_by_type( c->plugin_list, c->crypto_type ); - if ( c->cur_plugin == PLUGIN_INVALID_HANDLE ) { - error( "can't find a plugin for type %s", c->crypto_type ); - return NULL; - } + if ( c->cur_plugin == PLUGIN_INVALID_HANDLE ) { + error( "can't find a plugin for type %s", c->crypto_type ); + return NULL; + } - /* Dereference the API. */ - if ( (rc = plugin_get_syms( c->cur_plugin, + /* Dereference the API. */ + if ( (rc = plugin_get_syms( c->cur_plugin, n_syms, syms, (void **) &c->ops )) < n_syms ) { - error( "incomplete crypto plugin detected only " + error( "incomplete crypto plugin detected only " "got %d out of %d", rc, n_syms); - return NULL; - } + return NULL; + } - return &c->ops; + return &c->ops; } static int _slurm_crypto_init(void) @@ -1114,7 +1112,7 @@ slurm_cred_begin_expiration(slurm_cred_ctx_t ctx, uint32_t jobid) j->expiration = time(NULL) + ctx->expiry_window; debug2 ("set revoke expiration for jobid %u to %s", - j->jobid, timestr (&j->expiration, buf, 64) ); + j->jobid, timestr (&j->expiration, buf, 64) ); slurm_mutex_unlock(&ctx->mutex); return SLURM_SUCCESS; @@ -1160,7 +1158,7 @@ static char *_core_format(bitstr_t *core_bitmap) #endif /* - * Retrieve the set of cores that were allocated to the job and step then + * Retrieve the set of cores that were allocated to the job and step then * format them in the List Format (e.g., "0-2,7,12-14"). Also return * job and step's memory limit. * @@ -1317,11 +1315,11 @@ slurm_cred_unpack(Buf buffer) xfree(bit_fmt); safe_unpack16( &cred->core_array_size, buffer); if (cred->core_array_size) { - safe_unpack16_array(&cred->cores_per_socket, &len, + safe_unpack16_array(&cred->cores_per_socket, &len, buffer); if (len != cred->core_array_size) goto unpack_error; - safe_unpack16_array(&cred->sockets_per_node, &len, + safe_unpack16_array(&cred->sockets_per_node, &len, buffer); if (len != cred->core_array_size) goto unpack_error; @@ -1569,9 +1567,9 @@ _slurm_cred_sign(slurm_cred_ctx_t ctx, slurm_cred_t *cred) buffer = init_buf(4096); _pack_cred(cred, buffer); rc = (*(g_crypto_context->ops.crypto_sign))(ctx->key, - get_buf_data(buffer), + get_buf_data(buffer), get_buf_offset(buffer), - &cred->signature, + &cred->signature, &cred->siglen); free_buf(buffer); @@ -1594,15 +1592,15 @@ _slurm_cred_verify_signature(slurm_cred_ctx_t ctx, slurm_cred_t *cred) _pack_cred(cred, buffer); rc = (*(g_crypto_context->ops.crypto_verify_sign))(ctx->key, - get_buf_data(buffer), + get_buf_data(buffer), get_buf_offset(buffer), - cred->signature, + cred->signature, cred->siglen); if (rc && _exkey_is_valid(ctx)) { rc = (*(g_crypto_context->ops.crypto_verify_sign))(ctx->exkey, - get_buf_data(buffer), + get_buf_data(buffer), get_buf_offset(buffer), - cred->signature, + cred->signature, cred->siglen); } free_buf(buffer); @@ -1638,13 +1636,13 @@ _pack_cred(slurm_cred_t *cred, Buf buffer) pack_bit_fmt(cred->step_core_bitmap, buffer); pack16(cred->core_array_size, buffer); if (cred->core_array_size) { - pack16_array(cred->cores_per_socket, + pack16_array(cred->cores_per_socket, cred->core_array_size, buffer); - pack16_array(cred->sockets_per_node, + pack16_array(cred->sockets_per_node, cred->core_array_size, buffer); - pack32_array(cred->sock_core_rep_count, + pack32_array(cred->sock_core_rep_count, cred->core_array_size, buffer); } @@ -1756,7 +1754,7 @@ _credential_revoked(slurm_cred_ctx_t ctx, slurm_cred_t *cred) if (cred->ctime <= j->revoked) { char buf[64]; debug ("cred for %u revoked. expires at %s", - j->jobid, timestr (&j->expiration, buf, 64)); + j->jobid, timestr (&j->expiration, buf, 64)); return true; } @@ -1958,7 +1956,7 @@ _job_state_unpack_one(Buf buffer) t3[0] = '\0'; } debug3("cred_unpack: job %u ctime:%s%s%s", - j->jobid, timestr (&j->ctime, t1, 64), t2, t3); + j->jobid, timestr (&j->ctime, t1, 64), t2, t3); if ((j->revoked) && (j->expiration == (time_t) MAX_TIME)) { info ("Warning: revoke on job %u has no expiration", diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index b1dd1afcbcb5265ce1a75734495d945950b744ac..c66283e0a717c75907abb0dc3c50cec59cbc60b7 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -59,9 +59,7 @@ #include "src/common/job_options.h" #include "src/common/forward.h" #include "src/common/slurm_jobacct_gather.h" -#ifdef HAVE_BG #include "src/plugins/select/bluegene/wrap_rm_api.h" -#endif /* ** Define slurm-specific aliases for use by plugins, see slurm_xlator.h @@ -1353,7 +1351,6 @@ extern char *conn_type_string(enum connection_type conn_type) return "Small"; case (SELECT_NAV): return "NAV"; -#ifndef HAVE_BGL case SELECT_HTC_S: return "HTC_S"; case SELECT_HTC_D: @@ -1362,14 +1359,12 @@ extern char *conn_type_string(enum connection_type conn_type) return "HTC_V"; case SELECT_HTC_L: return "HTC_L"; -#endif default: return "n/a"; } return "n/a"; } -#ifdef HAVE_BGL extern char* node_use_string(enum node_use_type node_use) { switch (node_use) { @@ -1382,13 +1377,30 @@ extern char* node_use_string(enum node_use_type node_use) } return ""; } -#endif extern char *bg_block_state_string(uint16_t state) { static char tmp[16]; + /* This is needs to happen cross cluster. Since the enums + * changed. We don't handle BUSY or REBOOTING though, these + * states are extremely rare so it isn't that big of a deal. + */ +#ifdef HAVE_BGL + if(working_cluster_rec) { + if(!(working_cluster_rec->flags & CLUSTER_FLAG_BGL)) { + if(state == RM_PARTITION_BUSY) + state = RM_PARTITION_READY; + } + } +#else + if(working_cluster_rec) { + if(working_cluster_rec->flags & CLUSTER_FLAG_BGL) { + if(state == RM_PARTITION_REBOOTING) + state = RM_PARTITION_READY; + } + } +#endif -#ifdef HAVE_BG switch ((rm_partition_state_t)state) { #ifdef HAVE_BGL case RM_PARTITION_BUSY: @@ -1410,7 +1422,6 @@ extern char *bg_block_state_string(uint16_t state) case RM_PARTITION_READY: return "READY"; } -#endif snprintf(tmp, sizeof(tmp), "%d", state); return tmp; diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index b971ba007daeb18480580c51b52677ec1a896c42..d3e7ec95c2e91549f7773479e43442c50fd6ed2c 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -1059,9 +1059,7 @@ extern void private_data_string(uint16_t private_data, char *str, int str_len); extern void accounting_enforce_string(uint16_t enforce, char *str, int str_len); extern char *conn_type_string(enum connection_type conn_type); -#ifdef HAVE_BGL extern char *node_use_string(enum node_use_type node_use); -#endif /* Translate a state enum to a readable string */ extern char *bg_block_state_string(uint16_t state); diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 7894da1bf248390591b3fa7554012654d8dc0216..5e7a11fd6d3c66908bb588028ab0e5996f971af0 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -4998,13 +4998,12 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, } else { job_desc_ptr->select_jobinfo = select_g_select_jobinfo_alloc(); -#ifdef HAVE_BG + if(job_desc_ptr->geometry[0] != (uint16_t) NO_VAL) select_g_select_jobinfo_set( job_desc_ptr->select_jobinfo, SELECT_JOBDATA_GEOMETRY, job_desc_ptr->geometry); -#endif if (job_desc_ptr->conn_type != (uint16_t) NO_VAL) select_g_select_jobinfo_set( @@ -5315,9 +5314,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, /* These are set so we don't confuse them later for what is * set in the select_jobinfo structure. */ -#ifdef HAVE_BG job_desc_ptr->geometry[0] = (uint16_t)NO_VAL; -#endif job_desc_ptr->conn_type = (uint16_t)NO_VAL; job_desc_ptr->reboot = (uint16_t)NO_VAL; job_desc_ptr->rotate = (uint16_t)NO_VAL; diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c index 946cb481d9445b1ac7d00837420b614dfa09ee98..2b973097ec09d143098f4a067b6a225e4241f755 100644 --- a/src/common/slurmdb_defs.c +++ b/src/common/slurmdb_defs.c @@ -45,6 +45,7 @@ #include "src/common/slurm_strcasestr.h" #include "src/common/slurm_protocol_defs.h" #include "src/common/parse_time.h" +#include "src/common/node_select.h" slurmdb_cluster_rec_t *working_cluster_rec = NULL; @@ -753,6 +754,42 @@ extern void slurmdb_destroy_report_cluster_grouping(void *object) } } +extern uint32_t slurmdb_setup_cluster_flags() +{ + uint32_t cluster_flags = 0; + + if(working_cluster_rec) + return working_cluster_rec->flags; + +#ifdef HAVE_BG + cluster_flags |= CLUSTER_FLAG_BG; +#endif +#ifdef HAVE_BGL + cluster_flags |= CLUSTER_FLAG_BGL; +#endif +#ifdef HAVE_BGP + cluster_flags |= CLUSTER_FLAG_BGP; +#endif +#ifdef HAVE_BGQ + cluster_flags |= CLUSTER_FLAG_BGQ; +#endif +#ifdef HAVE_SUN_CONST + cluster_flags |= CLUSTER_FLAG_SC; +#endif +#ifdef HAVE_XCPU + cluster_flags |= CLUSTER_FLAG_XCPU; +#endif +#ifdef HAVE_AIX + cluster_flags |= CLUSTER_FLAG_AIX; +#endif +#ifdef MULTIPLE_SLURMD + cluster_flags |= CLUSTER_FLAG_MULTSD; +#endif +#ifdef HAVE_CRAY_XT + cluster_flags |= CLUSTER_FLAG_CRAYXT; +#endif + return cluster_flags; +} extern List slurmdb_get_info_cluster(char *cluster_names) { @@ -763,7 +800,7 @@ extern List slurmdb_get_info_cluster(char *cluster_names) void *db_conn = NULL; ListIterator itr, itr2; int err = 0; - + int plugin_id_select = 0; xassert(cluster_names); cluster_name = slurm_get_cluster_name(); @@ -791,6 +828,7 @@ extern List slurmdb_get_info_cluster(char *cluster_names) err = 1; goto next; } + if(cluster_rec->rpc_version < 8) { error("Slurmctld on '%s' must be running at least " "SLURM 2.2 for cross-cluster communication.", @@ -800,6 +838,18 @@ extern List slurmdb_get_info_cluster(char *cluster_names) goto next; } + if((plugin_id_select = select_get_plugin_id_pos( + cluster_rec->plugin_id_select)) == SLURM_ERROR) { + error("Cluster '%s' has an unknown select plugin_id %u", + cluster_name, + cluster_rec->plugin_id_select); + list_delete_item(itr); + err = 1; + goto next; + } + + cluster_rec->plugin_id_select = plugin_id_select; + slurm_set_addr(&cluster_rec->control_addr, cluster_rec->control_port, cluster_rec->control_host); diff --git a/src/common/slurmdb_defs.h b/src/common/slurmdb_defs.h index da635054590dc0f8b900cb2bc22fe094a88255bd..ddc19c492c267d3402606ee575df981434d936df 100644 --- a/src/common/slurmdb_defs.h +++ b/src/common/slurmdb_defs.h @@ -100,6 +100,7 @@ extern void slurmdb_destroy_report_cluster_grouping(void *object); extern void slurmdb_init_association_rec(slurmdb_association_rec_t *assoc); extern void slurmdb_init_qos_rec(slurmdb_qos_rec_t *qos); +extern uint32_t slurmdb_setup_cluster_flags(); extern List slurmdb_get_info_cluster(char *cluster_name); extern char *slurmdb_qos_str(List qos_list, uint32_t level); extern uint32_t str_2_slurmdb_qos(List qos_list, char *level); diff --git a/src/common/slurmdb_pack.c b/src/common/slurmdb_pack.c index e0cb7a9534508c65e664f4308d99f3e7ad53f0e0..00d2743cabda82303dc8881275ee9ba8c028a6ce 100644 --- a/src/common/slurmdb_pack.c +++ b/src/common/slurmdb_pack.c @@ -508,6 +508,8 @@ extern void slurmdb_pack_cluster_rec(void *in, uint16_t rpc_version, Buf buffer) packnull(buffer); packnull(buffer); + pack32(NO_VAL, buffer); + slurmdb_pack_association_rec(NULL, rpc_version, buffer); pack16(0, buffer); @@ -539,6 +541,8 @@ extern void slurmdb_pack_cluster_rec(void *in, uint16_t rpc_version, Buf buffer) packstr(object->name, buffer); packstr(object->nodes, buffer); + pack32(object->plugin_id_select, buffer); + slurmdb_pack_association_rec(object->root_assoc, rpc_version, buffer); @@ -627,6 +631,8 @@ extern int slurmdb_unpack_cluster_rec(void **object, uint16_t rpc_version, safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->nodes, &uint32_tmp, buffer); + safe_unpack32(&object_ptr->plugin_id_select, buffer); + if(slurmdb_unpack_association_rec( (void **)&object_ptr->root_assoc, rpc_version, buffer) diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index 35fe4f14bc2232264f53c57705d202d202a3d86f..63a466426f6a05dbec7429e4b5b04ad532bbec22 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -3632,6 +3632,7 @@ slurmdbd_pack_register_ctld_msg(dbd_register_ctld_msg_t *msg, if(rpc_version >= 8) { pack16(msg->dimensions, buffer); pack32(msg->flags, buffer); + pack32(msg->plugin_id_select, buffer); pack16(msg->port, buffer); } else { packnull(buffer); @@ -3650,6 +3651,7 @@ slurmdbd_unpack_register_ctld_msg(dbd_register_ctld_msg_t **msg, if(rpc_version >= 8) { safe_unpack16(&msg_ptr->dimensions, buffer); safe_unpack32(&msg_ptr->flags, buffer); + safe_unpack32(&msg_ptr->plugin_id_select, buffer); safe_unpack16(&msg_ptr->port, buffer); } else { char *tmp_char = NULL; diff --git a/src/common/slurmdbd_defs.h b/src/common/slurmdbd_defs.h index 5a0d1715c878bd0c821a65d789fd3662e382d9ba..e5779ae6c14fd35d57d1b959a9cacaf2d75358fc 100644 --- a/src/common/slurmdbd_defs.h +++ b/src/common/slurmdbd_defs.h @@ -333,6 +333,7 @@ typedef struct dbd_rc_msg { typedef struct dbd_register_ctld_msg { uint16_t dimensions; /* dimensions of system */ uint32_t flags; /* flags for cluster */ + uint32_t plugin_id_select; /* the select plugin_id */ uint16_t port; /* slurmctld's comm port */ } dbd_register_ctld_msg_t; diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c index 2c2d71657d46a2464bb83b9173b10b8a897ba7c6..817825c34281ebd9e59b9fbfa96186b482f4cfcb 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c @@ -709,7 +709,6 @@ static int _parse_line(char *f[], void **data, int len) for (i=0; (*job)->nodes[i]; i++) { /* discard trailing <CR> */ if (isspace((*job)->nodes[i])) { (*job)->nodes[i] = '\0'; - info("got here"); } } if (!strcmp((*job)->nodes, "(null)")) { diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index f7accf979ffc25695b9f9d25e7e72831549c8fbc..3835d0c2d0301896c5169dd7ce4c6feb17ee4070 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -447,6 +447,7 @@ static int _as_mysql_acct_check_tables(MYSQL *db_conn) { "rpc_version", "smallint unsigned not null default 0" }, { "classification", "smallint unsigned default 0" }, { "dimensions", "smallint unsigned default 1" }, + { "plugin_id_select", "smallint unsigned default 0" }, { "flags", "int unsigned default 0" }, { NULL, NULL} }; diff --git a/src/plugins/accounting_storage/mysql/as_mysql_cluster.c b/src/plugins/accounting_storage/mysql/as_mysql_cluster.c index 9e73d5c22e2d8952093bd4c761d5c354a03e556a..3f48747656c90dcb6e5ae2ed5a26d222b98f2965 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_cluster.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_cluster.c @@ -279,6 +279,11 @@ extern List as_mysql_modify_clusters(mysql_conn_t *mysql_conn, uint32_t uid, clust_reg = true; } + if(cluster->plugin_id_select) { + xstrfmtcat(vals, ", plugin_id_select=%u", + cluster->plugin_id_select); + clust_reg = true; + } if(cluster->flags != NO_VAL) { xstrfmtcat(vals, ", flags=%u", cluster->flags); clust_reg = true; @@ -551,6 +556,7 @@ extern List as_mysql_get_clusters(mysql_conn_t *mysql_conn, uid_t uid, "rpc_version", "dimensions", "flags", + "plugin_id_select", }; enum { CLUSTER_REQ_NAME, @@ -560,6 +566,7 @@ extern List as_mysql_get_clusters(mysql_conn_t *mysql_conn, uid_t uid, CLUSTER_REQ_VERSION, CLUSTER_REQ_DIMS, CLUSTER_REQ_FLAGS, + CLUSTER_REQ_PI_SELECT, CLUSTER_REQ_COUNT }; @@ -653,6 +660,7 @@ empty: cluster->rpc_version = atoi(row[CLUSTER_REQ_VERSION]); cluster->dimensions = atoi(row[CLUSTER_REQ_DIMS]); cluster->flags = atoi(row[CLUSTER_REQ_FLAGS]); + cluster->plugin_id_select = atoi(row[CLUSTER_REQ_PI_SELECT]); query = xstrdup_printf( "select cpu_count, cluster_nodes from " diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index b9725ecf0fdb73cd2ad246ceb7a7daea7f5e285c..e8d7cb2912a3cb4a09696befea4fd21a872890f3 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -1833,6 +1833,7 @@ extern int clusteracct_storage_p_register_ctld(void *db_conn, uint16_t port, req.port = port; req.dimensions = dims; req.flags = flags; + req.plugin_id_select = select_get_plugin_id(); msg.msg_type = DBD_REGISTER_CTLD; msg.data = &req; diff --git a/src/plugins/select/bluegene/plugin/jobinfo.c b/src/plugins/select/bluegene/plugin/jobinfo.c index 6ad46dd9a43bb6f2d9286555ac60d899eded3105..833e7c8ac49d40637e4059089942d6eabf9c48ef 100644 --- a/src/plugins/select/bluegene/plugin/jobinfo.c +++ b/src/plugins/select/bluegene/plugin/jobinfo.c @@ -346,13 +346,15 @@ extern int pack_select_jobinfo(select_jobinfo_t *jobinfo, Buf buffer, uint16_t protocol_version) { int i; + int dims = working_cluster_rec ? + working_cluster_rec->dimensions : SYSTEM_DIMENSIONS; if(protocol_version >= SLURM_2_2_PROTOCOL_VERSION) { if (jobinfo) { /* NOTE: If new elements are added here, make sure to * add equivalant pack of zeros below for NULL * pointer */ - for (i=0; i<SYSTEM_DIMENSIONS; i++) { + for (i=0; i<dims; i++) { pack16(jobinfo->geometry[i], buffer); } pack16(jobinfo->conn_type, buffer); @@ -365,9 +367,7 @@ extern int pack_select_jobinfo(select_jobinfo_t *jobinfo, Buf buffer, packstr(jobinfo->nodes, buffer); packstr(jobinfo->ionodes, buffer); -#ifdef HAVE_BGL packstr(jobinfo->blrtsimage, buffer); -#endif packstr(jobinfo->linuximage, buffer); packstr(jobinfo->mloaderimage, buffer); packstr(jobinfo->ramdiskimage, buffer); @@ -375,7 +375,7 @@ extern int pack_select_jobinfo(select_jobinfo_t *jobinfo, Buf buffer, /* pack space for 3 positions for geo * then 1 for conn_type, reboot, and rotate */ - for (i=0; i<(SYSTEM_DIMENSIONS+3); i++) + for (i=0; i<(dims+3); i++) pack16((uint16_t) 0, buffer); pack32((uint32_t) 0, buffer); //node_cnt @@ -384,9 +384,7 @@ extern int pack_select_jobinfo(select_jobinfo_t *jobinfo, Buf buffer, packnull(buffer); //nodes packnull(buffer); //ionodes -#ifdef HAVE_BGL packnull(buffer); //blrts -#endif packnull(buffer); //linux packnull(buffer); //mloader packnull(buffer); //ramdisk @@ -474,16 +472,8 @@ extern int unpack_select_jobinfo(select_jobinfo_t **jobinfo_pptr, Buf buffer, safe_unpackstr_xmalloc(&(jobinfo->ionodes), &uint32_tmp, buffer); - if(working_cluster_rec) { - if(working_cluster_rec->flags & CLUSTER_FLAG_BGL) - safe_unpackstr_xmalloc(&(jobinfo->blrtsimage), - &uint32_tmp, buffer); - } else { -#ifdef HAVE_BGL - safe_unpackstr_xmalloc(&(jobinfo->blrtsimage), - &uint32_tmp, buffer); -#endif - } + safe_unpackstr_xmalloc(&(jobinfo->blrtsimage), + &uint32_tmp, buffer); safe_unpackstr_xmalloc(&(jobinfo->linuximage), &uint32_tmp, buffer); safe_unpackstr_xmalloc(&(jobinfo->mloaderimage), &uint32_tmp, @@ -507,17 +497,10 @@ extern int unpack_select_jobinfo(select_jobinfo_t **jobinfo_pptr, Buf buffer, safe_unpackstr_xmalloc(&(jobinfo->ionodes), &uint32_tmp, buffer); - if(working_cluster_rec) { - if(working_cluster_rec->flags & CLUSTER_FLAG_BGL) - safe_unpackstr_xmalloc(&(jobinfo->blrtsimage), - &uint32_tmp, buffer); - } else { #ifdef HAVE_BGL - safe_unpackstr_xmalloc(&(jobinfo->blrtsimage), - &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&(jobinfo->blrtsimage), + &uint32_tmp, buffer); #endif - } - safe_unpackstr_xmalloc(&(jobinfo->linuximage), &uint32_tmp, buffer); safe_unpackstr_xmalloc(&(jobinfo->mloaderimage), &uint32_tmp, @@ -553,7 +536,7 @@ extern char *sprint_select_jobinfo(select_jobinfo_t *jobinfo, } if ((mode != SELECT_PRINT_DATA) - && jobinfo && (jobinfo->magic != JOBINFO_MAGIC)) { + && jobinfo && (jobinfo->magic != JOBINFO_MAGIC)) { error("sprint_jobinfo: jobinfo magic bad"); return NULL; } diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 4630ce1fe11fdf0d7990759b3153b6ebc2c89b73..c1efeaa0ffbeb7b38f703d729d20376165342f99 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1197,7 +1197,8 @@ static void _set_options(int argc, char **argv) } break; case 'M': - list_destroy(opt.clusters); + if(opt.clusters) + list_destroy(opt.clusters); if(!(opt.clusters = slurmdb_get_info_cluster(optarg))) { error("'%s' invalid entry for --cluster", diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h index b498ce1b31c8bbabf705a6c840356cae3709a98f..34d0b56f10e335787cdb6161320b105e63ea0304 100644 --- a/src/sbatch/opt.h +++ b/src/sbatch/opt.h @@ -144,7 +144,7 @@ typedef struct sbatch_options { char *exc_nodes; /* --exclude=node1,node2,... -x */ /* BLUEGENE SPECIFIC */ - uint16_t geometry[SYSTEM_DIMENSIONS]; /* --geometry, -g */ + uint16_t geometry[HIGHEST_DIMENSIONS]; /* --geometry, -g */ bool reboot; /* --reboot */ bool no_rotate; /* --no_rotate, -R */ uint16_t conn_type; /* --conn-type */ diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index e8f7fcb25d41e9430a28dd042c1636ed01d33c88..949b590dbaa3dcb22dafe4f568a8ff5b4bb2ce37 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -242,13 +242,16 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) if (opt.hold) desc->priority = 0; -#ifdef HAVE_BG - if (opt.geometry[0] > 0) { + + if ((int)opt.geometry[0] > 0) { int i; - for (i=0; i<SYSTEM_DIMENSIONS; i++) + int dims = working_cluster_rec ? + working_cluster_rec->dimensions : SYSTEM_DIMENSIONS; + + for (i=0; i<dims; i++) desc->geometry[i] = opt.geometry[i]; } -#endif + if (opt.conn_type != (uint16_t) NO_VAL) desc->conn_type = opt.conn_type; if (opt.reboot) diff --git a/src/sinfo/sinfo.c b/src/sinfo/sinfo.c index 91fad08f71f66ea1e82152dec6e59184809bf7ae..4230b20e6419430e9d9289203bdaabd25d3ad453 100644 --- a/src/sinfo/sinfo.c +++ b/src/sinfo/sinfo.c @@ -133,76 +133,6 @@ int main(int argc, char *argv[]) exit(rc); } -static char *_conn_type_str(int conn_type) -{ - switch (conn_type) { - case (SELECT_MESH): - return "MESH"; - case (SELECT_TORUS): - return "TORUS"; - case (SELECT_SMALL): - return "SMALL"; - } - return "?"; -} - -static char *_node_use_str(int node_use) -{ - switch (node_use) { - case (SELECT_COPROCESSOR_MODE): - return "COPROCESSOR"; - case (SELECT_VIRTUAL_NODE_MODE): - return "VIRTUAL"; - } - return "?"; -} - -static char *_part_state_str(int state) -{ - static char tmp[16]; - /* This is needs to happen cross cluster. Since the enums - * changed. We don't handle BUSY or REBOOTING though, these - * states are extremely rare so it isn't that big of a deal. - */ -#ifdef HAVE_BGL - if(working_cluster_rec) { - if(!(working_cluster_rec->flags & CLUSTER_FLAG_BGL)) { - if(state == RM_PARTITION_BUSY) - state = RM_PARTITION_READY; - } - } -#else - if(working_cluster_rec) { - if(working_cluster_rec->flags & CLUSTER_FLAG_BGL) { - if(state == RM_PARTITION_REBOOTING) - state = RM_PARTITION_READY; - } - } -#endif - switch (state) { -#ifdef HAVE_BGL - case RM_PARTITION_BUSY: - return "BUSY"; -#else - case RM_PARTITION_REBOOTING: - return "REBOOTING"; -#endif - case RM_PARTITION_CONFIGURING: - return "CONFIG"; - case RM_PARTITION_DEALLOCATING: - return "DEALLOC"; - case RM_PARTITION_ERROR: - return "ERROR"; - case RM_PARTITION_FREE: - return "FREE"; - case RM_PARTITION_READY: - return "READY"; - } - - snprintf(tmp, sizeof(tmp), "%d", state); - return tmp; -} - /* * _bg_report - download and print current bgblock state information */ @@ -225,11 +155,11 @@ static int _bg_report(block_info_msg_t *block_ptr) block_ptr->block_array[i].bg_block_id, block_ptr->block_array[i].nodes, block_ptr->block_array[i].owner_name, - _part_state_str( + bg_block_state_string( block_ptr->block_array[i].state), - _conn_type_str( + conn_type_string( block_ptr->block_array[i].conn_type), - _node_use_str( + node_use_string( block_ptr->block_array[i].node_use)); } diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index cc857ddb27c454fe64d9f2fa3afd6bebbf41cc8d..6d76440e63fb11087cdbe7f4308e4a293ad040ad 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -221,34 +221,7 @@ int main(int argc, char *argv[]) WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; assoc_init_args_t assoc_init_arg; pthread_t assoc_cache_thread; - -#ifdef HAVE_BG - cluster_flags |= CLUSTER_FLAG_BG; -#endif -#ifdef HAVE_BGL - cluster_flags |= CLUSTER_FLAG_BGL; -#endif -#ifdef HAVE_BGP - cluster_flags |= CLUSTER_FLAG_BGP; -#endif -#ifdef HAVE_BGQ - cluster_flags |= CLUSTER_FLAG_BGQ; -#endif -#ifdef HAVE_SUN_CONST - cluster_flags |= CLUSTER_FLAG_SC; -#endif -#ifdef HAVE_XCPU - cluster_flags |= CLUSTER_FLAG_XCPU; -#endif -#ifdef HAVE_AIX - cluster_flags |= CLUSTER_FLAG_AIX; -#endif -#ifdef MULTIPLE_SLURMD - cluster_flags |= CLUSTER_FLAG_MULTSD; -#endif -#ifdef HAVE_CRAY_XT - cluster_flags |= CLUSTER_FLAG_CRAYXT; -#endif + cluster_flags = slurmdb_setup_cluster_flags(); /* * Establish initial configuration diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 468db9c68a1ca0f2f773e25f26152631ecb9d67a..515b7851c6d6c78f01ba1657b06955bbd06b6795 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -1207,11 +1207,11 @@ _rpc_batch_job(slurm_msg_t *msg) */ #ifdef HAVE_BG select_g_select_jobinfo_get(req->select_jobinfo, - SELECT_JOBDATA_BLOCK_ID, &resv_id); + SELECT_JOBDATA_BLOCK_ID, &resv_id); #endif #ifdef HAVE_CRAY_XT select_g_select_jobinfo_get(req->select_jobinfo, - SELECT_JOBDATA_RESV_ID, &resv_id); + SELECT_JOBDATA_RESV_ID, &resv_id); #endif rc = _run_prolog(req->job_id, req->uid, resv_id, req->spank_job_env, req->spank_job_env_size); @@ -3109,12 +3109,13 @@ _rpc_abort_job(slurm_msg_t *msg) save_cred_state(conf->vctx); #ifdef HAVE_BG - select_g_select_jobinfo_get(req->select_jobinfo, SELECT_JOBDATA_BLOCK_ID, - &resv_id); + select_g_select_jobinfo_get(req->select_jobinfo, + SELECT_JOBDATA_BLOCK_ID, + &resv_id); #endif #ifdef HAVE_CRAY_XT select_g_select_jobinfo_get(req->select_jobinfo, SELECT_JOBDATA_RESV_ID, - &resv_id); + &resv_id); #endif _run_epilog(req->job_id, req->job_uid, resv_id, req->spank_job_env, req->spank_job_env_size); @@ -3296,12 +3297,14 @@ _rpc_terminate_job(slurm_msg_t *msg) save_cred_state(conf->vctx); #ifdef HAVE_BG - select_g_select_jobinfo_get(req->select_jobinfo, SELECT_JOBDATA_BLOCK_ID, - &resv_id); + select_g_select_jobinfo_get(req->select_jobinfo, + SELECT_JOBDATA_BLOCK_ID, + &resv_id); #endif #ifdef HAVE_CRAY_XT - select_g_select_jobinfo_get(req->select_jobinfo, SELECT_JOBDATA_RESV_ID, - &resv_id); + select_g_select_jobinfo_get(req->select_jobinfo, + SELECT_JOBDATA_RESV_ID, + &resv_id); #endif rc = _run_epilog(req->job_id, req->job_uid, resv_id, req->spank_job_env, req->spank_job_env_size); diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index a0aae8c868cd7c79d22b35f0e2f1361664cdf0d5..89d8d5a21c3841e79567106b8ec8584c82949917 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -2610,6 +2610,7 @@ static int _register_ctld(slurmdbd_conn_t *slurmdbd_conn, cluster.control_port = register_ctld_msg->port; cluster.dimensions = register_ctld_msg->dimensions; cluster.flags = register_ctld_msg->flags; + cluster.plugin_id_select = register_ctld_msg->plugin_id_select; cluster.rpc_version = slurmdbd_conn->rpc_version; list_msg.my_list = acct_storage_g_modify_clusters(