diff --git a/src/plugins/select/bluegene/Makefile.am b/src/plugins/select/bluegene/Makefile.am index 1ea9e5d8b370c871a43e9bbb75250bb0d44c9216..a7c84388171feacf5464f4cfc1f1c5017fd30b27 100644 --- a/src/plugins/select/bluegene/Makefile.am +++ b/src/plugins/select/bluegene/Makefile.am @@ -12,6 +12,7 @@ pkglib_LTLIBRARIES = select_bluegene.la # Blue Gene node selection plugin. select_bluegene_la_SOURCES = select_bluegene.c \ + bgl_job.c bgl_job.h \ bluegene.c bluegene.h \ state_test.c state_test.h \ partition_sys.c partition_sys.h diff --git a/src/plugins/select/bluegene/bgl_job.c b/src/plugins/select/bluegene/bgl_job.c new file mode 100644 index 0000000000000000000000000000000000000000..189fb8bee3dfb0e06662d9a57c2952cf73bf038e --- /dev/null +++ b/src/plugins/select/bluegene/bgl_job.c @@ -0,0 +1,286 @@ +/*****************************************************************************\ + * bluegene.c - blue gene node allocation module. + ***************************************************************************** + * Copyright (C) 2004 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Dan Phung <phung4@llnl.gov> and Morris Jette <jette1@llnl.gov> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <slurm/slurm.h> + +#include "src/common/bitstring.h" +#include "src/common/list.h" +#include "src/common/log.h" +#include "src/common/macros.h" + +#include "src/slurmctld/proc_req.h" +#include "src/common/node_select.h" +#include "bluegene.h" +#include "partition_sys.h" + +#define BUFSIZE 4096 +#define BITSIZE 128 +#define DEFAULT_BLUEGENE_SERIAL "BGL" + +#define _DEBUG 0 + +#define SWAP(a,b,t) \ +_STMT_START { \ + (t) = (a); \ + (a) = (b); \ + (b) = (t); \ +} _STMT_END + +static int _find_best_partition_match(struct job_record* job_ptr, + bitstr_t* slurm_part_bitmap, + int min_nodes, int max_nodes, + int spec, bgl_record_t** found_bgl_record); +static void _rotate_geo(uint16_t *req_geometry, int rot_cnt); + +/* Rotate a 3-D geometry array through its six permutations */ +static void _rotate_geo(uint16_t *req_geometry, int rot_cnt) +{ + uint16_t tmp; + + switch (rot_cnt) { + case 0: /* ABC -> ACB */ + SWAP(req_geometry[1], req_geometry[2], tmp); + break; + case 1: /* ACB -> CAB */ + SWAP(req_geometry[0], req_geometry[1], tmp); + break; + case 2: /* CAB -> CBA */ + SWAP(req_geometry[1], req_geometry[2], tmp); + break; + case 3: /* CBA -> BCA */ + SWAP(req_geometry[0], req_geometry[1], tmp); + break; + case 4: /* BCA -> BAC */ + SWAP(req_geometry[1], req_geometry[2], tmp); + break; + case 5: /* BAC -> ABC */ + SWAP(req_geometry[0], req_geometry[1], tmp); + break; + } +} +/* + * finds the best match for a given job request + * + * IN - int spec right now holds the place for some type of + * specification as to the importance of certain job params, for + * instance, geometry, type, size, etc. + * + * OUT - part_id of matched partition, NULL otherwise + * returns 1 for error (no match) + * + */ +static int _find_best_partition_match(struct job_record* job_ptr, + bitstr_t* slurm_part_bitmap, int min_nodes, int max_nodes, + int spec, bgl_record_t** found_bgl_record) +{ + ListIterator itr; + bgl_record_t* record; + int i; + uint16_t req_geometry[SYSTEM_DIMENSIONS]; + uint16_t conn_type, node_use, rotate, target_size = 1; + + sort_bgl_record_inc_size(bgl_list); + + select_g_get_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_CONN_TYPE, &conn_type); + select_g_get_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_GEOMETRY, req_geometry); + select_g_get_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_NODE_USE, &node_use); + select_g_get_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_ROTATE, &rotate); + for (i=0; i<SYSTEM_DIMENSIONS; i++) + target_size *= req_geometry[i]; + if (target_size == 0) /* no geometry specified */ + target_size = min_nodes; + + /* this is where we should have the control flow depending on + * the spec arguement */ + itr = list_iterator_create(bgl_list); + *found_bgl_record = NULL; + /* FIXME: NEED TO PUT THIS LOGIC IN: + * if RM_NAV, then the partition with both the TORUS and the + * dims should be favored over the MESH and the dims, but + * foremost is the correct num of dims. + */ + debug("number of partitions to check: %d", list_count(bgl_list)); + while ((record = (bgl_record_t*) list_next(itr))) { + /* + * check that the number of nodes is suitable + */ + if ((record->size < min_nodes) + || (max_nodes != 0 && record->size > max_nodes) + || (record->size < target_size)) { + debug("partition %s node count not suitable", + record->slurm_part_id); + continue; + } + + /* Check that configured */ + if (!record->alloc_part) { + error("warning, bgl_record %s undefined in " + "bluegene.conf", record->nodes); + continue; + } + + /* + * Next we check that this partition's bitmap is within + * the set of nodes which the job can use. + * Nodes not available for the job could be down, + * drained, allocated to some other job, or in some + * SLURM partition not available to this job. + */ + if (!bit_super_set(record->bitmap, slurm_part_bitmap)) { + debug("bgl partition %s has nodes not usable by this " + "job", record->nodes); + continue; + } + + /* + * Insure that any required nodes are in this BGL partition + */ + if (job_ptr->details->req_node_bitmap + && (!bit_super_set(job_ptr->details->req_node_bitmap, + record->bitmap))) { + info("bgl partition %s lacks required nodes", + record->nodes); + continue; + } + + /***********************************************/ + /* check the connection type specified matches */ + /***********************************************/ + if ((conn_type != record->conn_type) + && (conn_type != RM_NAV)) { + debug("bgl partition %s conn-type not usable", + record->nodes); + continue; + } + + /***********************************************/ + /* check the node_use specified matches */ + /***********************************************/ + if (node_use != record->node_use) { + debug("bgl partition %s node-use not usable", + record->nodes); + continue; + } + + /*****************************************/ + /** match up geometry as "best" possible */ + /*****************************************/ + if (req_geometry[0] == 0) + ; /* Geometry not specified */ + else { /* match requested geometry */ + bool match = false; + int rot_cnt = 0; /* attempt six rotations */ + + for (rot_cnt=0; rot_cnt<6; rot_cnt++) { + for (i=0; i<SYSTEM_DIMENSIONS; i++) { + if (record->alloc_part->dimensions[i] < + req_geometry[i]) + break; + } + if (i == SYSTEM_DIMENSIONS) { + match = true; + break; + } + if (rotate == 0) + break; /* not usable */ + _rotate_geo(req_geometry, rot_cnt); + } + + if (!match) + continue; /* Not usable */ + } + + if ((*found_bgl_record == NULL) + || (record->size < (*found_bgl_record)->size)) { + *found_bgl_record = record; + if (record->size == target_size) + break; + } + } + + /* set the bitmap and do other allocation activities */ + if (*found_bgl_record) { + debug("_find_best_partition_match %s <%s>", + (*found_bgl_record)->slurm_part_id, + (*found_bgl_record)->nodes); + bit_and(slurm_part_bitmap, (*found_bgl_record)->bitmap); + return SLURM_SUCCESS; + } + + debug("_find_best_partition_match none found"); + return SLURM_ERROR; +} + +/* + * Try to find resources for a given job request + * IN job_ptr - pointer to job record in slurmctld + * IN/OUT bitmap - nodes availble for assignment to job, clear those not to + * be used + * IN min_nodes, max_nodes - minimum and maximum number of nodes to allocate + * to this job (considers slurm partition limits) + * RET - SLURM_SUCCESS if job runnable now, error code otherwise + */ +extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_part_bitmap, + int min_nodes, int max_nodes) +{ + int spec = 1; /* this will be like, keep TYPE a priority, etc, */ + bgl_record_t* record; + char buf[100]; + + debug("bluegene::submit_job"); + + select_g_sprint_jobinfo(job_ptr->select_jobinfo, buf, sizeof(buf), + SELECT_PRINT_MIXED); + debug("bluegene:submit_job: %s nodes=%d-%d", buf, min_nodes, max_nodes); + + if (_find_best_partition_match(job_ptr, slurm_part_bitmap, min_nodes, + max_nodes, spec, &record)) { + return SLURM_ERROR; + } else { + /* now we place the part_id into the env of the script to run */ + char bgl_part_id[BITSIZE]; +#ifdef HAVE_BGL_FILES + snprintf(bgl_part_id, BITSIZE, "%s", record->bgl_part_id); +#else + snprintf(bgl_part_id, BITSIZE, "UNDEFINED"); +#endif + select_g_set_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_PART_ID, bgl_part_id); + } + + /* we should do the BGL stuff here like, init BGL job stuff... */ + return SLURM_SUCCESS; +} diff --git a/src/plugins/select/bluegene/bgl_job.h b/src/plugins/select/bluegene/bgl_job.h new file mode 100644 index 0000000000000000000000000000000000000000..75a3fa9ecb79dac11043182683d7816ef522d904 --- /dev/null +++ b/src/plugins/select/bluegene/bgl_job.h @@ -0,0 +1,41 @@ +/*****************************************************************************\ + * bgl_job.h - header for blue gene job management functions. + ***************************************************************************** + * Copyright (C) 2004 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Dan Phung <phung4@llnl.gov> et. al. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#ifndef _BGL_JOB_H_ +#define _BGL_JOB_H_ + +/* + * Try to find resources for a given job request + * IN job_ptr - pointer to job record in slurmctld + * IN/OUT bitmap - nodes availble for assignment to job, clear those not to + * be used + * IN min_nodes, max_nodes - minimum and maximum number of nodes to allocate + * to this job (considers slurm partition limits) + * RET - SLURM_SUCCESS if job runnable now, error code otherwise + */ +extern int submit_job(struct job_record *job_ptr, bitstr_t *bitmap, + int min_nodes, int max_nodes); + +#endif /* _BGL_JOB_H_ */ diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c index c0b6452c9ba3f8f3ee8d20d9c59e1a8a27c093bc..593892af882871a9a616ad2e68b846e2d64a71a6 100644 --- a/src/plugins/select/bluegene/bluegene.c +++ b/src/plugins/select/bluegene/bluegene.c @@ -43,6 +43,7 @@ #include "src/common/parse_spec.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" +#include "bgl_job.h" #include "bluegene.h" #include "partition_sys.h" #include "state_test.h" @@ -65,13 +66,6 @@ char *bluegene_blrts = NULL, *bluegene_linux = NULL, *bluegene_mloader = NULL; char *bluegene_ramdisk = NULL, *bluegene_serial = NULL; bool agent_fini = false; -#define SWAP(a,b,t) \ -_STMT_START { \ - (t) = (a); \ - (a) = (b); \ - (b) = (t); \ -} _STMT_END - /* some local functions */ static int _bgl_record_cmpf_inc(bgl_record_t* rec_a, bgl_record_t* rec_b); static int _bgl_record_cmpf_dec(bgl_record_t* rec_a, bgl_record_t* rec_b); @@ -80,10 +74,6 @@ static void _destroy_bgl_conf_record(void* object); static void _destroy_bgl_record(void* object); static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2, char *tv_str, int len_tv_str); -static int _find_best_partition_match(struct job_record* job_ptr, - bitstr_t* slurm_part_bitmap, - int min_nodes, int max_nodes, - int spec, bgl_record_t** found_bgl_record); static bgl_conf_record_t* _find_config_by_nodes(char* nodes); static int _listfindf_conf_part_record(bgl_conf_record_t* record, char *nodes); @@ -94,9 +84,6 @@ static int _sync_partitions(void); static int _validate_config_nodes(void); static int _wire_bgl_partitions(void); -/* Rotate a geometry array through six permutations */ -static void _rotate_geo(uint16_t *req_geometry, int rot_cnt); - /* * create_static_partitions - create the static partitions that will be used * for scheduling. @@ -851,161 +838,6 @@ extern char* convert_node_use(rm_partition_mode_t pt) return ""; } -/* - * finds the best match for a given job request - * - * IN - int spec right now holds the place for some type of - * specification as to the importance of certain job params, for - * instance, geometry, type, size, etc. - * - * OUT - part_id of matched partition, NULL otherwise - * returns 1 for error (no match) - * - */ -static int _find_best_partition_match(struct job_record* job_ptr, - bitstr_t* slurm_part_bitmap, int min_nodes, int max_nodes, - int spec, bgl_record_t** found_bgl_record) -{ - ListIterator itr; - bgl_record_t* record; - int i; - uint16_t req_geometry[SYSTEM_DIMENSIONS]; - uint16_t conn_type, node_use, rotate, target_size = 1; - - sort_bgl_record_inc_size(bgl_list); - - select_g_get_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_CONN_TYPE, &conn_type); - select_g_get_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_GEOMETRY, req_geometry); - select_g_get_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_NODE_USE, &node_use); - select_g_get_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_ROTATE, &rotate); - for (i=0; i<SYSTEM_DIMENSIONS; i++) - target_size *= req_geometry[i]; - if (target_size == 0) /* no geometry specified */ - target_size = min_nodes; - - /** this is where we should have the control flow depending on - the spec arguement*/ - itr = list_iterator_create(bgl_list); - *found_bgl_record = NULL; - /* NEED TO PUT THIS LOGIC IN: - * if RM_NAV, then the partition with both the TORUS and the - * dims should be favored over the MESH and the dims, but - * foremost is the correct num of dims. - */ - debug("number of partitions to check: %d", list_count(bgl_list)); - while ((record = (bgl_record_t*) list_next(itr))) { - /* - * check that the number of nodes is suitable - */ - if ((record->size < min_nodes) - || (max_nodes != 0 && record->size > max_nodes) - || (record->size < target_size)) { - debug("partition %s node count not suitable", - record->slurm_part_id); - continue; - } - - /* Check that configured */ - if (!record->alloc_part) { - error("warning, bgl_record %s undefined in bluegene.conf", - record->nodes); - continue; - } - - /* - * Next we check that this partition's bitmap is within - * the set of nodes which the job can use. - * Nodes not available for the job could be down, - * drained, allocated to some other job, or in some - * SLURM partition not available to this job. - */ - if (!bit_super_set(record->bitmap, slurm_part_bitmap)) { - debug("bgl partition %s has nodes not usable by this " - "job", record->nodes); - continue; - } - - /* - * Insure that any required nodes are in this BGL partition - */ - if (job_ptr->details->req_node_bitmap - && (!bit_super_set(job_ptr->details->req_node_bitmap, - record->bitmap))) { - info("bgl partition %s lacks required nodes", - record->nodes); - continue; - } - - /***********************************************/ - /* check the connection type specified matches */ - /***********************************************/ - if ((conn_type != record->conn_type) - && (conn_type != RM_NAV)) { - debug("bgl partition %s conn-type not usable", - record->nodes); - continue; - } - - /***********************************************/ - /* check the node_use specified matches */ - /***********************************************/ - if (node_use != record->node_use) { - debug("bgl partition %s node-use not usable", record->nodes); - continue; - } - - /*****************************************/ - /** match up geometry as "best" possible */ - /*****************************************/ - if (req_geometry[0] == 0) - ; /*Geometry not specified */ - else { /* match requested geometry */ - bool match = false; - int rot_cnt = 0; /* attempt six rotations of dimensions */ - - for (rot_cnt=0; rot_cnt<6; rot_cnt++) { - for (i=0; i<SYSTEM_DIMENSIONS; i++) { - if (record->alloc_part->dimensions[i] < - req_geometry[i]) - break; - } - if (i == SYSTEM_DIMENSIONS) { - match = true; - break; - } - if (rotate == 0) - break; /* not usable */ - _rotate_geo(req_geometry, rot_cnt); - } - - if (!match) - continue; /* Not usable */ - } - - if ((*found_bgl_record == NULL) - || (record->size < (*found_bgl_record)->size)) { - *found_bgl_record = record; - if (record->size == target_size) - break; - } - } - - /** set the bitmap and do other allocation activities */ - if (*found_bgl_record) { - debug("_find_best_partition_match %s <%s>", - (*found_bgl_record)->slurm_part_id, (*found_bgl_record)->nodes); - bit_and(slurm_part_bitmap, (*found_bgl_record)->bitmap); - return SLURM_SUCCESS; - } - - debug("_find_best_partition_match none found"); - return SLURM_ERROR; -} - /** * Comparator used for sorting partitions smallest to largest * @@ -1056,41 +888,6 @@ void sort_bgl_record_dec_size(List records){ list_sort(records, (ListCmpF) _bgl_record_cmpf_dec); } -/** - * - */ -extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_part_bitmap, - int min_nodes, int max_nodes) -{ - int spec = 1; // this will be like, keep TYPE a priority, etc, blah blah. - bgl_record_t* record; - char buf[100]; - - debug("bluegene::submit_job"); - - select_g_sprint_jobinfo(job_ptr->select_jobinfo, buf, sizeof(buf), - SELECT_PRINT_MIXED); - debug("bluegene:submit_job: %s nodes=%d-%d", buf, min_nodes, max_nodes); - - if (_find_best_partition_match(job_ptr, slurm_part_bitmap, min_nodes, max_nodes, - spec, &record)) { - return SLURM_ERROR; - } else { - /* now we place the part_id into the env of the script to run */ - char bgl_part_id[BITSIZE]; -#ifdef HAVE_BGL_FILES - snprintf(bgl_part_id, BITSIZE, "%s", record->bgl_part_id); -#else - snprintf(bgl_part_id, BITSIZE, "UNDEFINED"); -#endif - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_PART_ID, bgl_part_id); - } - - /** we should do the BGL stuff here like, init BGL job stuff... */ - return SLURM_SUCCESS; -} - /* * bluegene_agent - detached thread periodically updates status of * bluegene nodes. @@ -1141,30 +938,3 @@ static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2, delta_t += tv2->tv_usec - tv1->tv_usec; snprintf(tv_str, len_tv_str, "usec=%ld", delta_t); } - -/* Rotate a 3-D geometry array through its six permutations */ -static void _rotate_geo(uint16_t *req_geometry, int rot_cnt) -{ - uint16_t tmp; - - switch (rot_cnt) { - case 0: /* ABC -> ACB */ - SWAP(req_geometry[1], req_geometry[2], tmp); - break; - case 1: /* ACB -> CAB */ - SWAP(req_geometry[0], req_geometry[1], tmp); - break; - case 2: /* CAB -> CBA */ - SWAP(req_geometry[1], req_geometry[2], tmp); - break; - case 3: /* CBA -> BCA */ - SWAP(req_geometry[0], req_geometry[1], tmp); - break; - case 4: /* BCA -> BAC */ - SWAP(req_geometry[1], req_geometry[2], tmp); - break; - case 5: /* BAC -> ABC */ - SWAP(req_geometry[0], req_geometry[1], tmp); - break; - } -} diff --git a/src/plugins/select/bluegene/bluegene.h b/src/plugins/select/bluegene/bluegene.h index a5919d648e002a9fdd704d461f51f872a35de96f..b8a4701d0fa42c39630b97593b8c940f2c971adc 100644 --- a/src/plugins/select/bluegene/bluegene.h +++ b/src/plugins/select/bluegene/bluegene.h @@ -59,13 +59,14 @@ #endif /* Global variables */ -extern rm_BGL_t *bgl; +extern rm_BGL_t *bgl; /* DB2 pointer */ extern char *bluegene_blrts; extern char *bluegene_linux; extern char *bluegene_mloader; extern char *bluegene_ramdisk; extern char *bluegene_serial; extern List bgl_init_part_list; /* Initial bgl partition state */ +extern List bgl_list; /* List of configured BGL blocks */ extern bool agent_fini; typedef int lifecycle_type_t; diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index 843a6caf413339aa2a1cb71fd2d3d753ae43899c..36c23e1e94aa7ac0d9f9298c2cf8ae6258892d18 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -49,6 +49,7 @@ #include "src/common/xassert.h" #include "src/common/xmalloc.h" #include "src/slurmctld/slurmctld.h" +#include "bgl_job.h" #include "bluegene.h" /*