From 4a8ca0d841bb078877990ba827509c11e42d74cb Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 23 Dec 2008 20:14:00 +0000 Subject: [PATCH] change to 3-d array for representing node coordinates add support for network wrapping around the torus (X=0 and X=7 may be logically adjacent). --- src/plugins/select/3d_torus/select_3d_torus.c | 92 +++++++++++-------- src/plugins/select/3d_torus/select_3d_torus.h | 4 +- 2 files changed, 55 insertions(+), 41 deletions(-) diff --git a/src/plugins/select/3d_torus/select_3d_torus.c b/src/plugins/select/3d_torus/select_3d_torus.c index e9d81851e61..835c1ea7733 100644 --- a/src/plugins/select/3d_torus/select_3d_torus.c +++ b/src/plugins/select/3d_torus/select_3d_torus.c @@ -145,6 +145,7 @@ static bool cr_priority_test = false; static bool cr_priority_selection = false; static struct node_cr_record *node_cr_ptr = NULL; +static uint16_t max_coord[3]; static pthread_mutex_t cr_mutex = PTHREAD_MUTEX_INITIALIZER; static List step_cr_list = NULL; @@ -825,6 +826,24 @@ static int _find_job_mate(struct job_record *job_ptr, bitstr_t *bitmap, return EINVAL; } +static int _node_distance(uint16_t *focus, uint16_t *node_loc) +{ + int delta, distance = 0, i; + + for (i=0; i<3; i++) { + if (focus[i] > node_loc[i]) + delta = focus[i] - node_loc[i]; + else + delta = node_loc[i] - focus[i]; + if (delta > ((max_coord[i] + 1) / 2)) { + /* communication wraps arounds torus */ + delta = (max_coord[i] + 1) - delta; + } + distance += delta; + } + return distance; +} + static int _node_sort_by_distance(void *x, void *y) { struct node_select_struct *x_ptr = (struct node_select_struct *) x; @@ -851,8 +870,7 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, int avail_cpus, alloc_cpus = 0; int rem_cpus, rem_nodes; /* remaining resources desired */ int error_code = EINVAL; - int delta_x, delta_y, delta_z; - uint16_t focus_x, focus_y, focus_z; + uint16_t focus[3]; struct node_select_struct *node_select_ptr = NULL; List node_list = NULL; ListIterator iter; @@ -868,26 +886,25 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, rem_nodes = min_nodes; /* This is a very simple algorithm for now that picks one node as - * a focus point, then picks additional nodes from those available - * in order of minimum distance from that focus point. The focus + * a focal point, then picks additional nodes from those available + * in order of minimum distance from that focal point. The focal * point will be the first required node (if any) or the first - * available node. This logic does not take into consideration + * available node. This logic does take into consideration * network connections that wrap from one side of the machine * to the other (e.g. X=0 and X=7 might be logically adjacent). */ if (job_ptr->details->req_node_bitmap) i_first = bit_ffs(job_ptr->details->req_node_bitmap); else i_first = bit_ffs(bitmap); - focus_x = node_cr_ptr[i_first].x_coord; - focus_y = node_cr_ptr[i_first].y_coord; - focus_z = node_cr_ptr[i_first].z_coord; + for (i=0; i<3; i++) + focus[i] = node_cr_ptr[i_first].coord[i]; if (job_ptr->details->req_node_bitmap) i_first = bit_ffs(bitmap); i_last = bit_fls(bitmap); - node_list = list_create(_node_rec_delete); - /* Identify any specific required nodes. - * Identify distance from focus for other available nodes */ + /* Identify any specific required nodes and allocated to this job. + * For other available nodes, compute distance from focal point. */ + node_list = list_create(_node_rec_delete); for (i=i_first; i<=i_last; i++) { if (bit_test(bitmap, i)) { avail_cpus = _get_avail_cpus(job_ptr, i); @@ -905,26 +922,9 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, node_select_struct)); node_select_ptr->index = i; node_select_ptr->avail_cpus = avail_cpus; - if (focus_x > node_cr_ptr[i].x_coord) - delta_x = focus_x - - node_cr_ptr[i].x_coord; - else - delta_x = node_cr_ptr[i].x_coord - - focus_x; - if (focus_y > node_cr_ptr[i].y_coord) - delta_y = focus_y - - node_cr_ptr[i].y_coord; - else - delta_y = node_cr_ptr[i].y_coord - - focus_y; - if (focus_z > node_cr_ptr[i].z_coord) - delta_z = focus_z - - node_cr_ptr[i].z_coord; - else - delta_z = node_cr_ptr[i].z_coord - - focus_z; - node_select_ptr->distance = delta_x + delta_y + - delta_z; + node_select_ptr->distance = + _node_distance(focus, + node_cr_ptr[i].coord); if (!list_append(node_list, node_select_ptr)) fatal("malloc failure"); } @@ -934,8 +934,9 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, if ((rem_nodes <= 0) && (rem_cpus <= 0)) { error_code = SLURM_SUCCESS; } else { - /* If nodes needed, sort available node list and - * pick nodes based upon distance */ + /* If more resources needed, sort list of available nodes + * by distance from focal point and accumulate additional + * resources for the job. */ list_sort(node_list, _node_sort_by_distance); iter = list_iterator_create(node_list); while ((max_nodes > 0) && @@ -943,7 +944,7 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, list_next(iter))) { bit_set(bitmap, node_select_ptr->index); avail_cpus = _get_avail_cpus(job_ptr, - node_select_ptr->index); + node_select_ptr->index); rem_cpus -= avail_cpus; alloc_cpus += avail_cpus; rem_nodes--; @@ -1420,6 +1421,7 @@ static void _init_node_cr(void) struct job_record *job_ptr; ListIterator job_iterator; uint32_t job_memory_cpu, job_memory_node; + uint16_t min_coord[3]; int exclusive, i, i_first, i_last, j; char *coord_ptr; @@ -1427,7 +1429,10 @@ static void _init_node_cr(void) return; node_cr_ptr = xmalloc(select_node_cnt * sizeof(struct node_cr_record)); - + for (j=0; j<3; j++) { + max_coord[j] = 0; + min_coord[j] = 1; + } for (i=0; i<select_node_cnt; i++) { if (select_node_ptr[i].name == NULL) continue; @@ -1435,9 +1440,20 @@ static void _init_node_cr(void) continue; j -=3; coord_ptr = &select_node_ptr[i].name[j]; - node_cr_ptr[i].x_coord = _alpha_to_num(coord_ptr[0]); - node_cr_ptr[i].y_coord = _alpha_to_num(coord_ptr[1]); - node_cr_ptr[i].z_coord = _alpha_to_num(coord_ptr[2]); + for (j=0; j<3; j++) { + node_cr_ptr[i].coord[j] = _alpha_to_num(coord_ptr[j]); + min_coord[j] = MIN(min_coord[j], + node_cr_ptr[i].coord[j]); + max_coord[j] = MAX(max_coord[j], + node_cr_ptr[i].coord[j]); + } + } + for (j=0; j<3; j++) { + if (min_coord[j]) { + /* Not fatal, but not pretty */ + error("Node list expression is not zero origin"); + break; + } } /* build partition records */ diff --git a/src/plugins/select/3d_torus/select_3d_torus.h b/src/plugins/select/3d_torus/select_3d_torus.h index 3b42a4835bc..f654e907240 100644 --- a/src/plugins/select/3d_torus/select_3d_torus.h +++ b/src/plugins/select/3d_torus/select_3d_torus.h @@ -67,9 +67,7 @@ struct node_cr_record { uint32_t exclusive_jobid; /* if the node is allocated exclusively * to some job, put its jobid here, * otherwise value is zero */ - uint16_t x_coord; /* X-dimension coordinate */ - uint16_t y_coord; /* Y-dimension coordinate */ - uint16_t z_coord; /* Z-dimension coordinate */ + uint16_t coord[3]; /* X, Y and Z coordinates */ }; #endif /* !_SELECT_3D_TORUS_H */ -- GitLab