diff --git a/NEWS b/NEWS index e8ba67103546f0a94384d2ba996dae7dcc3047c9..79c72ae47fed20f31bef45f5f64cf97432e10889 100644 --- a/NEWS +++ b/NEWS @@ -144,6 +144,8 @@ documents those changes that are of interest to users and administrators. -- Fix output from api when printing job step not found. -- Don't allow user specified reservation names to disrupt the normal reservation sequeuece numbering scheme. + -- Fix scontrol to be able to accept TRES as an option when creating + a reservation. * Changes in Slurm 15.08.6 ========================== diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index d0e4bdceba22e052d5635ecf372d33e0f5f852eb..cc7dde03b52b03a0130fd49d3205991071e4e9bc 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -1513,6 +1513,35 @@ If users are denied access to a reservation (user name preceded by a '\-'), then all other users are implicitly allowed to use the reservation and it is not possible to also explicitly specify allowed users. +.TP +\fITRES\fP=<tres_spec> +Comma\-separated list of TRES required for the reservation. Current supported +TRES types with reservations are: CPU, Node, License and BB. CPU and Node +follow the same format as CoreCnt and NodeCnt parameters respectively. +License names can be followed by an equal '=' and a count: + +License/<name1>=<count1>[,License/<name2>=<count2>,...] + +BurstBuffer can be specified in a similar way as BurstBuffer parameter. The +only difference is that colon symbol ':' should be replaced by an equal '=' +in order to follow the TRES format. + +Some examples of TRES valid specifications: + +TRES=cpu=5,bb/cray=4,license/iop1=1,license/iop2=3 + +TRES=node=5k,license/iop1=2 + +As specified in CoreCnt, if a nodelist is specified, cpu can be an array +of core numbers by node: +nodes=compute[1\-3] TRES=cpu=2,2,1,bb/cray=4,license/iop1=2 + +Please note that CPU, Node, License and BB can override CoreCnt, NodeCnt, +Licenses and BurstBuffer parameters respectively. Also CPU represents +CoreCnt, in a reservation and will be adjusted if you have threads per +core on your nodes. + + .TP \fBSPECIFICATIONS FOR UPDATE BLOCK/SUBMP \fR .TP diff --git a/src/scontrol/create_res.c b/src/scontrol/create_res.c index 6fc23db395636576b69518c2938220507eb810a2..cd382a83515f4c134df934931f3a636acf1d38b4 100644 --- a/src/scontrol/create_res.c +++ b/src/scontrol/create_res.c @@ -74,9 +74,264 @@ static char * _process_plus_minus(char plus_or_minus, char *src) return ret; } +static int _parse_resv_node_cnt(resv_desc_msg_t *resv_msg_ptr, char *val, + bool from_tres) +{ + char *endptr = NULL, *node_cnt, *tok, *ptrptr = NULL; + int node_inx = 0; + node_cnt = xstrdup(val); + tok = strtok_r(node_cnt, ",", &ptrptr); + while (tok) { + xrealloc(resv_msg_ptr->node_cnt, + sizeof(uint32_t) * (node_inx + 2)); + resv_msg_ptr->node_cnt[node_inx] = + strtol(tok, &endptr, 10); + if ((endptr != NULL) && + ((endptr[0] == 'k') || + (endptr[0] == 'K'))) { + resv_msg_ptr->node_cnt[node_inx] *= 1024; + } else if ((endptr != NULL) && + ((endptr[0] == 'm') || + (endptr[0] == 'M'))) { + resv_msg_ptr->node_cnt[node_inx] *= 1024 * 1024; + } else if ((endptr == NULL) || + (endptr[0] != '\0') || + (tok[0] == '\0')) { + exit_code = 1; + if (from_tres) + error("Invalid TRES node count %s", val); + else + error("Invalid node count %s", val); + xfree(node_cnt); + return SLURM_ERROR; + } + node_inx++; + tok = strtok_r(NULL, ",", &ptrptr); + } + + xfree(node_cnt); + return SLURM_SUCCESS; +} + +static int _parse_resv_core_cnt(resv_desc_msg_t *resv_msg_ptr, char *val, + bool from_tres) +{ + + char *endptr = NULL, *core_cnt, *tok, *ptrptr = NULL; + char *type; + int node_inx = 0; + + type = slurm_get_select_type(); + if (strcasestr(type, "cray")) { + int param; + param = slurm_get_select_type_param(); + if (! (param & CR_OTHER_CONS_RES)) { + error("CoreCnt or CPUCnt is only " + "suported when " + "SelectTypeParameters " + "includes OTHER_CONS_RES"); + xfree(type); + return SLURM_ERROR; + } + } else if (strcasestr(type, "cons_res") == NULL) { + error("CoreCnt or CPUCnt is only " + "suported when " + "SelectType includes " + "select/cons_res"); + xfree(type); + return SLURM_ERROR; + } + + xfree(type); + core_cnt = xstrdup(val); + tok = strtok_r(core_cnt, ",", &ptrptr); + while (tok) { + xrealloc(resv_msg_ptr->core_cnt, + sizeof(uint32_t) * (node_inx + 2)); + resv_msg_ptr->core_cnt[node_inx] = + strtol(tok, &endptr, 10); + if ((endptr == NULL) || + (endptr[0] != '\0') || + (tok[0] == '\0')) { + exit_code = 1; + if (from_tres) + error("Invalid TRES core count %s", val); + else + error("Invalid core count %s", val); + xfree(core_cnt); + return SLURM_ERROR; + } + node_inx++; + tok = strtok_r(NULL, ",", &ptrptr); + } + + xfree(core_cnt); + return SLURM_SUCCESS; +} + +/* -1 = error, 0 = is configured, 1 = isn't configured */ +static int _is_configured_tres(char *type) +{ + + int i, cc; + assoc_mgr_info_request_msg_t req; + assoc_mgr_info_msg_t *msg = NULL; + + memset(&req, 0, sizeof(assoc_mgr_info_request_msg_t)); + cc = slurm_load_assoc_mgr_info(&req, &msg); + if (cc != SLURM_PROTOCOL_SUCCESS) { + slurm_perror("slurm_load_assoc_mgr_info error"); + slurm_free_assoc_mgr_info_msg(msg); + return SLURM_ERROR; + } + + for (i = 0; i < msg->tres_cnt; ++i) { + if (!strcasecmp(msg->tres_names[i], type)) { + slurm_free_assoc_mgr_info_msg(msg); + return SLURM_SUCCESS; + } + } + + error("'%s' is not a configured TRES", type); + slurm_free_assoc_mgr_info_msg(msg); + return SLURM_ERROR; + +} + +static int _parse_resv_tres(char *val, resv_desc_msg_t *resv_msg_ptr, + int *free_tres_license, int *free_tres_bb, + int *free_tres_corecnt, int *free_tres_nodecnt) +{ + int i, ret, len; + char *tres_bb = NULL, *tres_license = NULL, + *tres_corecnt = NULL, *tres_nodecnt = NULL, + *token, *type = NULL, *saveptr1 = NULL, + *value_str = NULL, *name = NULL, *compound = NULL, + *tmp; + bool discard, first; + + *free_tres_license = 0; + *free_tres_bb = 0; + *free_tres_corecnt = 0; + *free_tres_nodecnt = 0; + + token = strtok_r(val, ",", &saveptr1); + while (token) { + + compound = strtok_r(token, "=", &value_str); + + if (!value_str || !*value_str) { + error("TRES component '%s' has an invalid value '%s'", + type, token); + goto error; + } + + if (strchr(compound, '/')) { + tmp = xstrdup(compound); + type = strtok_r(tmp, "/", &name); + } else + type = compound; + + if (_is_configured_tres(compound) < 0) + goto error; + + if (!strcasecmp(type, "license")) { + if (tres_license && tres_license[0] != '\0') + xstrcatchar(tres_license, ','); + xstrfmtcat(tres_license, "%s:%s", name, value_str); + token = strtok_r(NULL, ",", &saveptr1); + if (tmp) + xfree(tmp); + + } else if (strcasecmp(type, "bb") == 0) { + if (tres_bb && tres_bb[0] != '\0') + xstrcatchar(tres_bb, ','); + xstrfmtcat(tres_bb, "%s:%s", name, value_str); + token = strtok_r(NULL, ",", &saveptr1); + if (tmp) + xfree(tmp); + + } else if (strcasecmp(type, "cpu") == 0) { + first = true; + discard = false; + do { + len = strlen(value_str); + for (i = 0; i < len; i++) { + if (!isdigit(value_str[i])) { + if (first) { + error("TRES value '%s' " + "is invalid", + value_str); + goto error; + } else + discard = true; + break; + } + } + first = false; + if (!discard) { + if (tres_corecnt && tres_corecnt[0] + != '\0') + xstrcatchar(tres_corecnt, ','); + xstrcat(tres_corecnt, value_str); + + token = strtok_r(NULL, ",", &saveptr1); + value_str = token; + } + } while (!discard && token); + + } else if (strcasecmp(type, "node") == 0) { + if (tres_nodecnt && tres_nodecnt[0] != '\0') + xstrcatchar(tres_nodecnt, ','); + xstrcat(tres_nodecnt, value_str); + token = strtok_r(NULL, ",", &saveptr1); + } else { + error("TRES type '%s' not supported with reservations", + compound); + goto error; + } + + } + + if (tres_corecnt && tres_corecnt[0] != '\0') { + ret = _parse_resv_core_cnt(resv_msg_ptr, tres_corecnt, true); + xfree(tres_corecnt); + if (ret != SLURM_SUCCESS) + goto error; + *free_tres_corecnt = 1; + } + + if (tres_nodecnt && tres_nodecnt[0] != '\0') { + ret = _parse_resv_node_cnt(resv_msg_ptr, tres_nodecnt, true); + xfree(tres_nodecnt); + if (ret != SLURM_SUCCESS) + goto error; + *free_tres_nodecnt = 1; + } + + if (tres_license && tres_license[0] != '\0') { + resv_msg_ptr->licenses = tres_license; + *free_tres_license = 1; + } + + if (tres_bb && tres_bb[0] != '\0') { + resv_msg_ptr->burst_buffer = tres_bb; + *free_tres_bb = 1; + } + + return SLURM_SUCCESS; + +error: + xfree(tres_nodecnt); + xfree(tres_corecnt); + exit_code = 1; + return SLURM_ERROR; +} + + /* * scontrol_parse_res_options parse options for creating or updating a - reservation + reservation * IN argc - count of arguments * IN argv - list of arguments * IN msg - a string to append to any error message @@ -89,13 +344,19 @@ static char * _process_plus_minus(char plus_or_minus, char *src) extern int scontrol_parse_res_options(int argc, char *argv[], const char *msg, resv_desc_msg_t *resv_msg_ptr, - int *free_user_str, int *free_acct_str) + int *free_user_str, int *free_acct_str, + int *free_tres_license, int *free_tres_bb, + int *free_tres_corecnt, int *free_tres_nodecnt) { int i; int duration = -3; /* -1 == INFINITE, -2 == error, -3 == not set */ *free_user_str = 0; *free_acct_str = 0; + *free_tres_license = 0; + *free_tres_bb = 0; + *free_tres_corecnt = 0; + *free_tres_nodecnt = 0; for (i=0; i<argc; i++) { char *tag = argv[i]; @@ -110,7 +371,7 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg, } else if (!val || taglen == 0) { exit_code = 1; error("Unknown parameter %s. %s", argv[i], msg); - return -1; + return SLURM_ERROR; } if (val[-1] == '+' || val[-1] == '-') { plus_minus = val[-1]; @@ -138,7 +399,7 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg, exit_code = 1; error("Invalid start time %s. %s", argv[i], msg); - return -1; + return SLURM_ERROR; } resv_msg_ptr->start_time = t; @@ -147,7 +408,7 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg, if (errno == ESLURM_INVALID_TIME_VALUE) { exit_code = 1; error("Invalid end time %s. %s", argv[i],msg); - return -1; + return SLURM_ERROR; } resv_msg_ptr->end_time = t; @@ -157,7 +418,7 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg, if (duration < 0 && duration != INFINITE) { exit_code = 1; error("Invalid duration %s. %s", argv[i],msg); - return -1; + return SLURM_ERROR; } resv_msg_ptr->duration = (uint32_t)duration; @@ -172,97 +433,25 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg, f = parse_resv_flags(val, msg); } if (f == 0xffffffff) { - return -1; + return SLURM_ERROR; } else { resv_msg_ptr->flags = f; } } else if (strncasecmp(tag, "NodeCnt", MAX(taglen,5)) == 0 || strncasecmp(tag, "NodeCount", MAX(taglen,5)) == 0) { - char *endptr = NULL, *node_cnt, *tok, *ptrptr = NULL; - int node_inx = 0; - node_cnt = xstrdup(val); - tok = strtok_r(node_cnt, ",", &ptrptr); - while (tok) { - xrealloc(resv_msg_ptr->node_cnt, - sizeof(uint32_t) * (node_inx + 2)); - resv_msg_ptr->node_cnt[node_inx] = - strtol(tok, &endptr, 10); - if ((endptr != NULL) && - ((endptr[0] == 'k') || - (endptr[0] == 'K'))) { - resv_msg_ptr->node_cnt[node_inx] *= - 1024; - } else if ((endptr != NULL) && - ((endptr[0] == 'm') || - (endptr[0] == 'M'))) { - resv_msg_ptr->node_cnt[node_inx] *= - 1024 * 1024; - } else if ((endptr == NULL) || - (endptr[0] != '\0') || - (tok[0] == '\0')) { - exit_code = 1; - error("Invalid node count %s. %s", - argv[i], msg); - xfree(node_cnt); - return -1; - } - node_inx++; - tok = strtok_r(NULL, ",", &ptrptr); - } - xfree(node_cnt); + + if (_parse_resv_node_cnt(resv_msg_ptr, val, false) + == SLURM_ERROR) + return SLURM_ERROR; } else if (strncasecmp(tag, "CoreCnt", MAX(taglen,5)) == 0 || strncasecmp(tag, "CoreCount", MAX(taglen,5)) == 0 || strncasecmp(tag, "CPUCnt", MAX(taglen,5)) == 0 || strncasecmp(tag, "CPUCount", MAX(taglen,5)) == 0) { - char *endptr = NULL, *core_cnt, *tok, *ptrptr = NULL; - char *type; - int node_inx = 0; - - type = slurm_get_select_type(); - if (strcasestr(type, "cray")) { - int param; - param = slurm_get_select_type_param(); - if (! (param & CR_OTHER_CONS_RES)) { - error("CoreCnt or CPUCnt is only " - "suported when " - "SelectTypeParameters " - "includes OTHER_CONS_RES"); - xfree(type); - return -1; - } - } else { - if (strcasestr(type, "cons_res") == NULL) { - error("CoreCnt or CPUCnt is only " - "suported when " - "SelectType includes " - "select/cons_res"); - xfree(type); - return -1; - } - } - xfree(type); - core_cnt = xstrdup(val); - tok = strtok_r(core_cnt, ",", &ptrptr); - while (tok) { - xrealloc(resv_msg_ptr->core_cnt, - sizeof(uint32_t) * (node_inx + 2)); - resv_msg_ptr->core_cnt[node_inx] = - strtol(tok, &endptr, 10); - if ((endptr == NULL) || - (endptr[0] != '\0') || - (tok[0] == '\0')) { - exit_code = 1; - error("Invalid core count %s. %s", - argv[i], msg); - xfree(core_cnt); - return -1; - } - node_inx++; - tok = strtok_r(NULL, ",", &ptrptr); - } - xfree(core_cnt); + if (_parse_resv_core_cnt(resv_msg_ptr, val, false) + == SLURM_ERROR) + return SLURM_ERROR; } else if (strncasecmp(tag, "Nodes", MAX(taglen, 5)) == 0) { resv_msg_ptr->node_list = val; @@ -277,6 +466,13 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg, == 0) { resv_msg_ptr->partition = val; + } else if (strncasecmp(tag, "TRES", MAX(taglen, 1)) == 0) { + if (_parse_resv_tres(val, resv_msg_ptr, + free_tres_license, free_tres_bb, + free_tres_corecnt, + free_tres_nodecnt) == SLURM_ERROR) + return SLURM_ERROR; + } else if (strncasecmp(tag, "Users", MAX(taglen, 1)) == 0) { if (plus_minus) { resv_msg_ptr->users = @@ -288,19 +484,19 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg, } else if (strncasecmp(tag, "Watts", MAX(taglen, 1)) == 0) { if (parse_uint32(val, &(resv_msg_ptr->resv_watts))) { error("Invalid Watts value: %s", val); - return -1; + return SLURM_ERROR; } } else if (strncasecmp(tag, "res", 3) == 0) { continue; } else { exit_code = 1; error("Unknown parameter %s. %s", argv[i], msg); - return -1; + return SLURM_ERROR; } } - return 0; -} + return SLURM_SUCCESS; +} /* @@ -316,12 +512,15 @@ scontrol_update_res(int argc, char *argv[]) { resv_desc_msg_t resv_msg; int err, ret = 0; - int free_user_str = 0, free_acct_str = 0; + int free_user_str = 0, free_acct_str = 0, free_tres_license = 0, + free_tres_bb = 0, free_tres_corecnt = 0, free_tres_nodecnt = 0; slurm_init_resv_desc_msg (&resv_msg); err = scontrol_parse_res_options(argc, argv, "No reservation update.", &resv_msg, &free_user_str, - &free_acct_str); + &free_acct_str, &free_tres_license, + &free_tres_bb, &free_tres_corecnt, + &free_tres_nodecnt); if (err) goto SCONTROL_UPDATE_RES_CLEANUP; @@ -345,6 +544,14 @@ SCONTROL_UPDATE_RES_CLEANUP: xfree(resv_msg.users); if (free_acct_str) xfree(resv_msg.accounts); + if (free_tres_license) + xfree(resv_msg.licenses); + if (free_tres_bb) + xfree(resv_msg.burst_buffer); + if (free_tres_corecnt) + xfree(resv_msg.core_cnt); + if (free_tres_nodecnt) + xfree(resv_msg.node_cnt); return ret; } @@ -361,13 +568,16 @@ scontrol_create_res(int argc, char *argv[]) { resv_desc_msg_t resv_msg; char *new_res_name = NULL; - int free_user_str = 0, free_acct_str = 0; + int free_user_str = 0, free_acct_str = 0, free_tres_license = 0, + free_tres_bb = 0, free_tres_corecnt = 0, free_tres_nodecnt = 0; int err, ret = 0; slurm_init_resv_desc_msg (&resv_msg); err = scontrol_parse_res_options(argc, argv, "No reservation created.", &resv_msg, &free_user_str, - &free_acct_str); + &free_acct_str, &free_tres_license, + &free_tres_bb, &free_tres_corecnt, + &free_tres_nodecnt); if (err) goto SCONTROL_CREATE_RES_CLEANUP; @@ -483,5 +693,14 @@ SCONTROL_CREATE_RES_CLEANUP: xfree(resv_msg.users); if (free_acct_str) xfree(resv_msg.accounts); + if (free_tres_license) + xfree(resv_msg.licenses); + if (free_tres_bb) + xfree(resv_msg.burst_buffer); + if (free_tres_corecnt) + xfree(resv_msg.core_cnt); + if (free_tres_nodecnt) + xfree(resv_msg.node_cnt); + return ret; }