From 38a96fb0b89548931461d3613d2f17dfa0e0cc8a Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Wed, 22 Jul 2009 21:58:57 +0000 Subject: [PATCH] BLUEGENE - better logic to determine the num procs and max nodes when allocation comes in --- src/common/slurm_protocol_api.c | 1 + .../select/bluegene/plugin/bg_job_place.c | 45 +++++-------------- .../select/bluegene/plugin/select_bluegene.c | 41 ++++++++--------- src/slurmctld/job_mgr.c | 2 +- 4 files changed, 33 insertions(+), 56 deletions(-) diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 35dc9e30981..1a35092c989 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -3212,6 +3212,7 @@ List slurm_send_recv_msgs(const char *nodelist, slurm_msg_t *msg, nodelist); return NULL; } +/* info("got %s and %s", nodelist, name); */ hl = hostlist_create(name); free(name); } diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index bf78776879e..8ceb4a517eb 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -1366,26 +1366,13 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_CONN_TYPE, &conn_type); if(conn_type == SELECT_NAV) { - uint32_t max_procs = (uint32_t)NO_VAL; if(bg_conf->bp_node_cnt == bg_conf->nodecard_node_cnt) conn_type = SELECT_SMALL; - else if(min_nodes > 1) { + else if(min_nodes > 1) conn_type = SELECT_TORUS; - /* make sure the max procs are set to NO_VAL */ - select_g_select_jobinfo_set(job_ptr->select_jobinfo, - SELECT_JOBDATA_MAX_PROCS, - &max_procs); - - } else { - select_g_select_jobinfo_get(job_ptr->select_jobinfo, - SELECT_JOBDATA_MAX_PROCS, - &max_procs); - if((max_procs > bg_conf->procs_per_bp) - || (max_procs == NO_VAL)) - conn_type = SELECT_TORUS; - else - conn_type = SELECT_SMALL; - } + else if(job_ptr->num_procs < bg_conf->procs_per_bp) + conn_type = SELECT_SMALL; + select_g_select_jobinfo_set(job_ptr->select_jobinfo, SELECT_JOBDATA_CONN_TYPE, &conn_type); @@ -1570,26 +1557,14 @@ extern int test_job_list(List req_list) select_g_select_jobinfo_get(will_run->job_ptr->select_jobinfo, SELECT_JOBDATA_CONN_TYPE, &conn_type); if(conn_type == SELECT_NAV) { - uint32_t max_procs = (uint32_t)NO_VAL; - if(will_run->min_nodes > 1) { + if(bg_conf->bp_node_cnt == bg_conf->nodecard_node_cnt) + conn_type = SELECT_SMALL; + else if(will_run->min_nodes > 1) conn_type = SELECT_TORUS; - /* make sure the max procs are set to NO_VAL */ - select_g_select_jobinfo_set( - will_run->job_ptr->select_jobinfo, - SELECT_JOBDATA_MAX_PROCS, - &max_procs); - - } else { - select_g_select_jobinfo_get( - will_run->job_ptr->select_jobinfo, - SELECT_JOBDATA_MAX_PROCS, - &max_procs); - if((max_procs > bg_conf->procs_per_bp) - || (max_procs == NO_VAL)) - conn_type = SELECT_TORUS; - else + else if(will_run->job_ptr->num_procs + < bg_conf->procs_per_bp) conn_type = SELECT_SMALL; - } + select_g_select_jobinfo_set( will_run->job_ptr->select_jobinfo, SELECT_JOBDATA_CONN_TYPE, diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 52e1b990274..ca0765e731c 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -1118,8 +1118,8 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) SELECT_JOBDATA_ALTERED, &tmp); tmp = NO_VAL; set_select_jobinfo(job_desc->select_jobinfo, - SELECT_JOBDATA_MAX_PROCS, - &tmp); + SELECT_JOBDATA_MAX_PROCS, + &tmp); if(job_desc->min_nodes == (uint32_t) NO_VAL) return SLURM_SUCCESS; @@ -1135,14 +1135,14 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) job_desc->min_nodes *= bg_conf->bp_node_cnt; job_desc->max_nodes = job_desc->min_nodes; } + + /* initialize num_procs to the min_nodes */ + job_desc->num_procs = job_desc->min_nodes * bg_conf->proc_ratio; + + if((job_desc->max_nodes == (uint32_t) NO_VAL) + || (job_desc->max_nodes < job_desc->min_nodes)) + job_desc->max_nodes = job_desc->min_nodes; - if(job_desc->num_procs != NO_VAL) { - job_desc->num_procs /= bg_conf->proc_ratio; - if(job_desc->min_nodes < job_desc->num_procs) - job_desc->min_nodes = job_desc->num_procs; - if(job_desc->max_nodes < job_desc->num_procs) - job_desc->max_nodes = job_desc->num_procs; - } /* See if min_nodes is greater than one base partition */ if(job_desc->min_nodes > bg_conf->bp_node_cnt) { /* @@ -1179,8 +1179,8 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) bg_conf->bp_node_cnt; set_select_jobinfo(job_desc->select_jobinfo, - SELECT_JOBDATA_NODE_CNT, - &job_desc->min_nodes); + SELECT_JOBDATA_NODE_CNT, + &job_desc->min_nodes); tmp = bg_conf->bp_node_cnt/job_desc->min_nodes; @@ -1197,18 +1197,15 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) } set_select_jobinfo(job_desc->select_jobinfo, - SELECT_JOBDATA_NODE_CNT, - &job_desc->min_nodes); + SELECT_JOBDATA_NODE_CNT, + &job_desc->min_nodes); job_desc->num_procs = job_desc->min_nodes * bg_conf->proc_ratio; job_desc->min_nodes = 1; #endif } - - if(job_desc->max_nodes == (uint32_t) NO_VAL) - return SLURM_SUCCESS; - + if(job_desc->max_nodes > bg_conf->bp_node_cnt) { tmp = job_desc->max_nodes % bg_conf->bp_node_cnt; if(tmp > 0) @@ -1216,8 +1213,13 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) (bg_conf->bp_node_cnt-tmp); } tmp = job_desc->max_nodes / bg_conf->bp_node_cnt; + if(tmp > 0) { job_desc->max_nodes = tmp; + tmp *= bg_conf->procs_per_bp; + set_select_jobinfo(job_desc->select_jobinfo, + SELECT_JOBDATA_MAX_PROCS, + &tmp); tmp = NO_VAL; } else { #ifdef HAVE_BGL @@ -1237,8 +1239,8 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) tmp = bg_conf->procs_per_bp/tmp; set_select_jobinfo(job_desc->select_jobinfo, - SELECT_JOBDATA_MAX_PROCS, - &tmp); + SELECT_JOBDATA_MAX_PROCS, + &tmp); job_desc->max_nodes = 1; #else i = bg_conf->smallest_block; @@ -1249,7 +1251,6 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) } i *= 2; } - tmp = job_desc->max_nodes * bg_conf->proc_ratio; set_select_jobinfo(job_desc->select_jobinfo, SELECT_JOBDATA_MAX_PROCS, diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index a4ccdb4b1d8..55f815392e9 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2426,7 +2426,7 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, job_desc->num_procs); select_g_alter_node_cnt(SELECT_SET_NODE_CNT, job_desc); select_g_select_jobinfo_get(job_desc->select_jobinfo, - SELECT_JOBDATA_MAX_PROCS, &max_procs); + SELECT_JOBDATA_MAX_PROCS, &max_procs); debug3("after alteration asking for nodes %u-%u procs %u-%u", job_desc->min_nodes, job_desc->max_nodes, job_desc->num_procs, max_procs); -- GitLab