From f68d0a9a33cc3515018aa6b0cb07b6210c9dbad1 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Fri, 24 Jun 2011 14:13:15 -0700 Subject: [PATCH] cray - added support for the "switch" call in basil to handle gang scheduling --- src/plugins/select/cray/basil_alps.h | 10 ++++ src/plugins/select/cray/basil_interface.c | 22 +++++++ src/plugins/select/cray/basil_interface.h | 7 +++ src/plugins/select/cray/libalps/Makefile.am | 1 + src/plugins/select/cray/libalps/Makefile.in | 13 +++- .../select/cray/libalps/basil_request.c | 16 +++++ src/plugins/select/cray/libalps/do_switch.c | 60 +++++++++++++++++++ .../select/cray/libalps/parser_basil_4.0.c | 43 +++++++++++++ .../select/cray/libalps/parser_common.c | 45 +++++++++++++- .../select/cray/libalps/parser_internal.h | 2 + src/plugins/select/cray/parser_common.h | 1 + src/plugins/select/cray/select_cray.c | 12 ++++ 12 files changed, 227 insertions(+), 5 deletions(-) create mode 100644 src/plugins/select/cray/libalps/do_switch.c diff --git a/src/plugins/select/cray/basil_alps.h b/src/plugins/select/cray/basil_alps.h index 9b11b40dc03..437a2ff74bb 100644 --- a/src/plugins/select/cray/basil_alps.h +++ b/src/plugins/select/cray/basil_alps.h @@ -66,6 +66,7 @@ enum basil_method { BM_release, /* RELEASE method */ BM_engine, /* QUERY of type ENGINE */ BM_inventory, /* QUERY of type INVENTORY */ + BM_switch, /* SWITCH method */ BM_MAX, BM_UNKNOWN }; @@ -118,6 +119,11 @@ enum basil_element { BT_ACCELARRAY, /* Basil 4.0 Inventory/Node */ BT_ACCEL, /* Basil 4.0 Inventory/Node */ BT_ACCELALLOC, /* Basil 4.0 Inventory/Node */ + BT_SWITCH, /* SWITCH */ + BT_SWITCHRES, /* Response for Switch reservation */ + BT_SWITCHAPP, /* Response for Switch application */ + BT_SWITCHRESARRAY, /* Response for Switch reservation array */ + BT_SWITCHAPPARRAY, /* Response for Switch application array */ #define BT_4_0_MAX (BT_ACCELALLOC + 1) /* End of Basil 4.0 */ /* FIXME: the Basil 4.1 interface is not yet fully released */ #define BT_4_1_MAX BT_4_0_MAX /* End of Basil 4.1 */ @@ -449,6 +455,7 @@ struct basil_rsvn_param { * @rsvn_id: assigned by RESERVE method * @pagg_id: used by CONFIRM method (session ID or CSA PAGG ID) * @claims: number of claims outstanding against @rsvn_id (Basil 4.0) + * @suspended: If the reservation is suspended or not (Basil 4.0) * @rsvd_nodes: assigned by Basil 3.1 RESERVE method * @user_name: required by RESERVE method * @account_name: optional Basil 1.0 RESERVE parameter @@ -462,6 +469,8 @@ struct basil_reservation { uint32_t rsvn_id; uint64_t pagg_id; uint32_t claims; + bool suspended; + struct nodespec *rsvd_nodes; /* * Static (IN) parameters @@ -616,5 +625,6 @@ extern int basil_release(uint32_t rsvn_id); extern int basil_signal_apids(int32_t rsvn_id, int signal, struct basil_inventory *inv); extern int basil_safe_release(int32_t rsvn_id, struct basil_inventory *inv); +extern int basil_switch(uint32_t rsvn_id, bool suspend); #endif /* __BASIL_ALPS_H__ */ diff --git a/src/plugins/select/cray/basil_interface.c b/src/plugins/select/cray/basil_interface.c index 913290f4981..fff3e1dfe31 100644 --- a/src/plugins/select/cray/basil_interface.c +++ b/src/plugins/select/cray/basil_interface.c @@ -818,3 +818,25 @@ extern int do_basil_release(struct job_record *job_ptr) */ return SLURM_SUCCESS; } + +/** + * do_basil_switch - suspend/resume BASIL reservation + * IN job_ptr - pointer to job which has just been deallocated resources + * IN suspend - to suspend or not to suspend + * RET see below + */ +extern int do_basil_switch(struct job_record *job_ptr, bool suspend) +{ + uint32_t resv_id; + + if (_get_select_jobinfo(job_ptr->select_jobinfo->data, + SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) { + error("can not read resId for JobId=%u", job_ptr->job_id); + } else if (resv_id && basil_switch(resv_id, suspend) == 0) { + /* The resv_id is non-zero only if the job is or was running. */ + debug("%s ALPS resId %u for JobId %u", + suspend ? "Suspended" : "Resumed", + resv_id, job_ptr->job_id); + } + return SLURM_SUCCESS; +} diff --git a/src/plugins/select/cray/basil_interface.h b/src/plugins/select/cray/basil_interface.h index 71f32313c79..f4df5f9d45e 100644 --- a/src/plugins/select/cray/basil_interface.h +++ b/src/plugins/select/cray/basil_interface.h @@ -59,6 +59,7 @@ extern int do_basil_reserve(struct job_record *job_ptr); extern int do_basil_confirm(struct job_record *job_ptr); extern int do_basil_signal(struct job_record *job_ptr, int signal); extern int do_basil_release(struct job_record *job_ptr); +extern int do_basil_switch(struct job_record *job_ptr, bool suspend); #else /* !HAVE_CRAY */ static inline int basil_node_ranking(struct node_record *ig, int nore) { @@ -94,5 +95,11 @@ static inline int do_basil_release(struct job_record *job_ptr) { return SLURM_SUCCESS; } + +static inline int do_basil_switch(struct job_record *job_ptr, bool suspend) +{ + return SLURM_SUCCESS; +} + #endif /* HAVE_CRAY */ #endif /* __CRAY_BASIL_INTERFACE_H */ diff --git a/src/plugins/select/cray/libalps/Makefile.am b/src/plugins/select/cray/libalps/Makefile.am index 715753b1acf..1f73a513bba 100644 --- a/src/plugins/select/cray/libalps/Makefile.am +++ b/src/plugins/select/cray/libalps/Makefile.am @@ -20,6 +20,7 @@ libalps_la_SOURCES = \ do_reserve.c \ do_release.c \ do_confirm.c \ + do_switch.c \ memory_handling.c \ popen2.c \ atoul.c diff --git a/src/plugins/select/cray/libalps/Makefile.in b/src/plugins/select/cray/libalps/Makefile.in index 97d6dfc467b..0db94130a6b 100644 --- a/src/plugins/select/cray/libalps/Makefile.in +++ b/src/plugins/select/cray/libalps/Makefile.in @@ -92,8 +92,8 @@ am_libalps_la_OBJECTS = libalps_la-basil_mysql_routines.lo \ libalps_la-parser_basil_4.0.lo libalps_la-basil_request.lo \ libalps_la-do_query.lo libalps_la-do_reserve.lo \ libalps_la-do_release.lo libalps_la-do_confirm.lo \ - libalps_la-memory_handling.lo libalps_la-popen2.lo \ - libalps_la-atoul.lo + libalps_la-do_switch.lo libalps_la-memory_handling.lo \ + libalps_la-popen2.lo libalps_la-atoul.lo libalps_la_OBJECTS = $(am_libalps_la_OBJECTS) libalps_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libalps_la_CFLAGS) \ @@ -329,6 +329,7 @@ libalps_la_SOURCES = \ do_reserve.c \ do_release.c \ do_confirm.c \ + do_switch.c \ memory_handling.c \ popen2.c \ atoul.c @@ -395,6 +396,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-do_query.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-do_release.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-do_reserve.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-do_switch.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-memory_handling.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-parser_basil_1.0.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libalps_la-parser_basil_1.1.Plo@am__quote@ @@ -501,6 +503,13 @@ libalps_la-do_confirm.lo: do_confirm.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libalps_la_CFLAGS) $(CFLAGS) -c -o libalps_la-do_confirm.lo `test -f 'do_confirm.c' || echo '$(srcdir)/'`do_confirm.c +libalps_la-do_switch.lo: do_switch.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libalps_la_CFLAGS) $(CFLAGS) -MT libalps_la-do_switch.lo -MD -MP -MF $(DEPDIR)/libalps_la-do_switch.Tpo -c -o libalps_la-do_switch.lo `test -f 'do_switch.c' || echo '$(srcdir)/'`do_switch.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libalps_la-do_switch.Tpo $(DEPDIR)/libalps_la-do_switch.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='do_switch.c' object='libalps_la-do_switch.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libalps_la_CFLAGS) $(CFLAGS) -c -o libalps_la-do_switch.lo `test -f 'do_switch.c' || echo '$(srcdir)/'`do_switch.c + libalps_la-memory_handling.lo: memory_handling.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libalps_la_CFLAGS) $(CFLAGS) -MT libalps_la-memory_handling.lo -MD -MP -MF $(DEPDIR)/libalps_la-memory_handling.Tpo -c -o libalps_la-memory_handling.lo `test -f 'memory_handling.c' || echo '$(srcdir)/'`memory_handling.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libalps_la-memory_handling.Tpo $(DEPDIR)/libalps_la-memory_handling.Plo diff --git a/src/plugins/select/cray/libalps/basil_request.c b/src/plugins/select/cray/libalps/basil_request.c index 7a816cd3902..5c83df13a4a 100644 --- a/src/plugins/select/cray/libalps/basil_request.c +++ b/src/plugins/select/cray/libalps/basil_request.c @@ -2,6 +2,7 @@ * Fork apbasil process as co-process, parse output. * * Copyright (c) 2009-2011 Centro Svizzero di Calcolo Scientifico (CSCS) + * Portions Copyright (C) 2011 SchedMD <http://www.schedmd.com>. * Licensed under the GPLv2. */ #include "parser_internal.h" @@ -157,6 +158,21 @@ int basil_request(struct basil_parse_data *bp) case BM_release: fprintf(apbasil, "reservation_id=\"%u\"/>\n", bp->mdata.res->rsvn_id); + break; + case BM_switch: + { + char *suspend = bp->mdata.res->suspended ? "IN" : "OUT"; + fprintf(apbasil, ">\n"); + fprintf(apbasil, " <ApplicationArray>\n"); + fprintf(apbasil, " </ApplicationArray>\n"); + fprintf(apbasil, " <ReservationArray>\n"); + fprintf(apbasil, " <Reservation reservation_id=\"%u\" action=\"%s\"/>\n", + bp->mdata.res->rsvn_id, suspend); + fprintf(apbasil, " </ReservationArray>\n"); + fprintf(apbasil, "</BasilRequest>\n"); + info("sent the switch to %s for %u", suspend, bp->mdata.res->rsvn_id); + } + break; default: /* ignore BM_none, BM_MAX, and BM_UNKNOWN covered above */ break; } diff --git a/src/plugins/select/cray/libalps/do_switch.c b/src/plugins/select/cray/libalps/do_switch.c new file mode 100644 index 00000000000..e41c063bba1 --- /dev/null +++ b/src/plugins/select/cray/libalps/do_switch.c @@ -0,0 +1,60 @@ +/*****************************************************************************\ + * do_switch.c - Handle Switch method for cray systems. + ***************************************************************************** + * Copyright (C) 2011 SchedMD LLC + * Written by Danny Auble <da@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <https://computing.llnl.gov/linux/slurm/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include "../basil_alps.h" + +/** + * basil_switch - suspend/resume an existing reservation + * @rsvn_id: the reservation id + * @suspend: to suspend or not to suspend + * Returns 0 if ok, a negative %basil_error otherwise. + * + */ +int basil_switch(uint32_t rsvn_id, bool suspend) +{ + struct basil_reservation rsvn = {0}; + struct basil_parse_data bp = {0}; + + rsvn.rsvn_id = rsvn_id; + rsvn.suspended = suspend; + + bp.method = BM_switch; + bp.mdata.res = &rsvn; + bp.version = get_basil_version(); + /* NOTE - for simplicity we could use BV_1_0 here */ + + return basil_request(&bp); +} diff --git a/src/plugins/select/cray/libalps/parser_basil_4.0.c b/src/plugins/select/cray/libalps/parser_basil_4.0.c index e2c19989ccf..4ef61fd9422 100644 --- a/src/plugins/select/cray/libalps/parser_basil_4.0.c +++ b/src/plugins/select/cray/libalps/parser_basil_4.0.c @@ -126,6 +126,25 @@ void eh_accel_alloc(struct ud *ud, const XML_Char **attrs) } } +void eh_switch_resv(struct ud *ud, const XML_Char **attrs) +{ + char *attribs[] = { "reservation_id", "status" }; + + extract_attributes(attrs, attribs, ARRAY_SIZE(attribs)); + + debug2("resv id %s switch status is %s", attribs[0], attribs[1]); +} + +void eh_switch_app(struct ud *ud, const XML_Char **attrs) +{ + char *attribs[] = { "application_id", "status" }; + + extract_attributes(attrs, attribs, ARRAY_SIZE(attribs)); + + debug2("app id %s switch status is %s", attribs[0], attribs[1]); +} + + const struct element_handler basil_4_0_elements[] = { [BT_MESSAGE] = { .tag = "Message", @@ -313,6 +332,30 @@ const struct element_handler basil_4_0_elements[] = { .uniq = false, .hnd = eh_command }, + [BT_SWITCHRES] = { + .tag = "Reservation", + .depth = 3, + .uniq = false, + .hnd = eh_switch_resv + }, + [BT_SWITCHAPP] = { + .tag = "Application", + .depth = 3, + .uniq = false, + .hnd = eh_switch_app + }, + [BT_SWITCHRESARRAY] = { + .tag = "ReservationArray", + .depth = 2, + .uniq = true, + .hnd = NULL + }, + [BT_SWITCHAPPARRAY] = { + .tag = "ApplicationArray", + .depth = 2, + .uniq = true, + .hnd = NULL + }, [BT_4_0_MAX] = { NULL, 0, 0, NULL } diff --git a/src/plugins/select/cray/libalps/parser_common.c b/src/plugins/select/cray/libalps/parser_common.c index 6f91647e6a6..b90c1c705ca 100644 --- a/src/plugins/select/cray/libalps/parser_common.c +++ b/src/plugins/select/cray/libalps/parser_common.c @@ -511,6 +511,8 @@ static enum basil_method _tag_to_method(const enum basil_element tag) case BT_SEGMARRAY ... BT_COMMAND: /* INVENTORY, Basil >= 1.1 */ case BT_INVENTORY ... BT_RESVN: /* INVENTORY, Basil >= 1.0 */ return BM_inventory; + case BT_SWITCH ... BT_SWITCHAPPARRAY: + return BM_switch; default: return BM_UNKNOWN; } @@ -524,15 +526,33 @@ static void _start_handler(void *user_data, enum basil_method method; enum basil_element tag; - for (tag = BT_MESSAGE; table[tag].tag; tag++) - if (strcmp(table[tag].tag, el) == 0) + for (tag = BT_MESSAGE; table[tag].tag; tag++) { + if (strcmp(table[tag].tag, el) == 0) { + /* since BM_inventory is returned for Arrays + if the method is switch we need to "switch" + it up here. + */ + if (ud->bp->method == BM_switch) { + if(!strcmp(table[tag].tag, "ReservationArray")) + tag = BT_SWITCHRESARRAY; + else if(!strcmp(table[tag].tag, "Reservation")) + tag = BT_SWITCHRES; + else if(!strcmp(table[tag].tag, + "ApplicationArray")) + tag = BT_SWITCHAPPARRAY; + else if(!strcmp(table[tag].tag, "Application")) + tag = BT_SWITCHAPP; + } break; + } + } if (table[tag].tag == NULL) fatal("Unrecognized XML start tag '%s'", el); method = _tag_to_method(tag); if (method == BM_UNKNOWN) fatal("Unsupported XML start tag '%s'", el); + if (method != BM_none && method != ud->bp->method) fatal("Unexpected '%s' start tag within %u response, " "expected %u", @@ -568,8 +588,27 @@ static void _end_handler(void *user_data, const XML_Char *el) --ud->depth; for (end_tag = BT_MESSAGE; table[end_tag].tag; end_tag++) - if (strcmp(table[end_tag].tag, el) == 0) + if (strcmp(table[end_tag].tag, el) == 0) { + /* since BM_inventory is returned for Arrays + if the method is switch we need to "switch" + it up here. + */ + if (ud->bp->method == BM_switch) { + if(!strcmp(table[end_tag].tag, + "ReservationArray")) + end_tag = BT_SWITCHRESARRAY; + else if(!strcmp(table[end_tag].tag, + "Reservation")) + end_tag = BT_SWITCHRES; + else if(!strcmp(table[end_tag].tag, + "ApplicationArray")) + end_tag = BT_SWITCHAPPARRAY; + else if(!strcmp(table[end_tag].tag, + "Application")) + end_tag = BT_SWITCHAPP; + } break; + } if (table[end_tag].tag == NULL) { fatal("Unknown end tag '%s'", el); } else if (end_tag != ud->stack[ud->depth]) { diff --git a/src/plugins/select/cray/libalps/parser_internal.h b/src/plugins/select/cray/libalps/parser_internal.h index 71e8abaec50..b805e1ff903 100644 --- a/src/plugins/select/cray/libalps/parser_internal.h +++ b/src/plugins/select/cray/libalps/parser_internal.h @@ -105,5 +105,7 @@ extern void eh_resv_3_1(struct ud *ud, const XML_Char **attrs); /* Basil 4.0 and above common handlers */ extern void eh_accel(struct ud *ud, const XML_Char **attrs); extern void eh_accel_alloc(struct ud *ud, const XML_Char **attrs); +extern void eh_switch_res(struct ud *ud, const XML_Char **attrs); +extern void eh_switch_app(struct ud *ud, const XML_Char **attrs); #endif /*__PARSER_INTERNAL_H__ */ diff --git a/src/plugins/select/cray/parser_common.h b/src/plugins/select/cray/parser_common.h index 9b93b8b1abf..3a631cbd2ea 100644 --- a/src/plugins/select/cray/parser_common.h +++ b/src/plugins/select/cray/parser_common.h @@ -40,6 +40,7 @@ const char *bm_names[BM_MAX] = { [BM_reserve] = "RESERVE", [BM_confirm] = "CONFIRM", [BM_release] = "RELEASE", + [BM_switch] = "SWITCH", }; /* Error codes */ diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c index 3c79cebfe6b..a82474c24b5 100644 --- a/src/plugins/select/cray/select_cray.c +++ b/src/plugins/select/cray/select_cray.c @@ -346,11 +346,23 @@ extern int select_p_job_fini(struct job_record *job_ptr) extern int select_p_job_suspend(struct job_record *job_ptr, bool indf_susp) { + if (job_ptr == NULL) + return SLURM_SUCCESS; + + if (do_basil_switch(job_ptr, 1) != SLURM_SUCCESS) + return SLURM_ERROR; + return other_job_suspend(job_ptr, indf_susp); } extern int select_p_job_resume(struct job_record *job_ptr, bool indf_susp) { + if (job_ptr == NULL) + return SLURM_SUCCESS; + + if (do_basil_switch(job_ptr, 0) != SLURM_SUCCESS) + return SLURM_ERROR; + return other_job_resume(job_ptr, indf_susp); } -- GitLab