Skip to content
Snippets Groups Projects
Commit efae123f authored by Danny Auble's avatar Danny Auble
Browse files

Merge remote-tracking branch 'origin/slurm-15.08'

parents 0106a014 5d435700
No related branches found
No related tags found
No related merge requests found
......@@ -144,6 +144,8 @@ documents those changes that are of interest to users and administrators.
-- Fix output from api when printing job step not found.
-- Don't allow user specified reservation names to disrupt the normal
reservation sequeuece numbering scheme.
-- Fix scontrol to be able to accept TRES as an option when creating
a reservation.
* Changes in Slurm 15.08.6
==========================
......
......@@ -1513,6 +1513,35 @@ If users are denied access to a reservation (user name preceded by a '\-'),
then all other users are implicitly allowed to use the reservation and it is
not possible to also explicitly specify allowed users.
.TP
\fITRES\fP=<tres_spec>
Comma\-separated list of TRES required for the reservation. Current supported
TRES types with reservations are: CPU, Node, License and BB. CPU and Node
follow the same format as CoreCnt and NodeCnt parameters respectively.
License names can be followed by an equal '=' and a count:
License/<name1>=<count1>[,License/<name2>=<count2>,...]
BurstBuffer can be specified in a similar way as BurstBuffer parameter. The
only difference is that colon symbol ':' should be replaced by an equal '='
in order to follow the TRES format.
Some examples of TRES valid specifications:
TRES=cpu=5,bb/cray=4,license/iop1=1,license/iop2=3
TRES=node=5k,license/iop1=2
As specified in CoreCnt, if a nodelist is specified, cpu can be an array
of core numbers by node:
nodes=compute[1\-3] TRES=cpu=2,2,1,bb/cray=4,license/iop1=2
Please note that CPU, Node, License and BB can override CoreCnt, NodeCnt,
Licenses and BurstBuffer parameters respectively. Also CPU represents
CoreCnt, in a reservation and will be adjusted if you have threads per
core on your nodes.
.TP
\fBSPECIFICATIONS FOR UPDATE BLOCK/SUBMP \fR
.TP
......
......@@ -74,9 +74,264 @@ static char * _process_plus_minus(char plus_or_minus, char *src)
return ret;
}
static int _parse_resv_node_cnt(resv_desc_msg_t *resv_msg_ptr, char *val,
bool from_tres)
{
char *endptr = NULL, *node_cnt, *tok, *ptrptr = NULL;
int node_inx = 0;
node_cnt = xstrdup(val);
tok = strtok_r(node_cnt, ",", &ptrptr);
while (tok) {
xrealloc(resv_msg_ptr->node_cnt,
sizeof(uint32_t) * (node_inx + 2));
resv_msg_ptr->node_cnt[node_inx] =
strtol(tok, &endptr, 10);
if ((endptr != NULL) &&
((endptr[0] == 'k') ||
(endptr[0] == 'K'))) {
resv_msg_ptr->node_cnt[node_inx] *= 1024;
} else if ((endptr != NULL) &&
((endptr[0] == 'm') ||
(endptr[0] == 'M'))) {
resv_msg_ptr->node_cnt[node_inx] *= 1024 * 1024;
} else if ((endptr == NULL) ||
(endptr[0] != '\0') ||
(tok[0] == '\0')) {
exit_code = 1;
if (from_tres)
error("Invalid TRES node count %s", val);
else
error("Invalid node count %s", val);
xfree(node_cnt);
return SLURM_ERROR;
}
node_inx++;
tok = strtok_r(NULL, ",", &ptrptr);
}
xfree(node_cnt);
return SLURM_SUCCESS;
}
static int _parse_resv_core_cnt(resv_desc_msg_t *resv_msg_ptr, char *val,
bool from_tres)
{
char *endptr = NULL, *core_cnt, *tok, *ptrptr = NULL;
char *type;
int node_inx = 0;
type = slurm_get_select_type();
if (strcasestr(type, "cray")) {
int param;
param = slurm_get_select_type_param();
if (! (param & CR_OTHER_CONS_RES)) {
error("CoreCnt or CPUCnt is only "
"suported when "
"SelectTypeParameters "
"includes OTHER_CONS_RES");
xfree(type);
return SLURM_ERROR;
}
} else if (strcasestr(type, "cons_res") == NULL) {
error("CoreCnt or CPUCnt is only "
"suported when "
"SelectType includes "
"select/cons_res");
xfree(type);
return SLURM_ERROR;
}
xfree(type);
core_cnt = xstrdup(val);
tok = strtok_r(core_cnt, ",", &ptrptr);
while (tok) {
xrealloc(resv_msg_ptr->core_cnt,
sizeof(uint32_t) * (node_inx + 2));
resv_msg_ptr->core_cnt[node_inx] =
strtol(tok, &endptr, 10);
if ((endptr == NULL) ||
(endptr[0] != '\0') ||
(tok[0] == '\0')) {
exit_code = 1;
if (from_tres)
error("Invalid TRES core count %s", val);
else
error("Invalid core count %s", val);
xfree(core_cnt);
return SLURM_ERROR;
}
node_inx++;
tok = strtok_r(NULL, ",", &ptrptr);
}
xfree(core_cnt);
return SLURM_SUCCESS;
}
/* -1 = error, 0 = is configured, 1 = isn't configured */
static int _is_configured_tres(char *type)
{
int i, cc;
assoc_mgr_info_request_msg_t req;
assoc_mgr_info_msg_t *msg = NULL;
memset(&req, 0, sizeof(assoc_mgr_info_request_msg_t));
cc = slurm_load_assoc_mgr_info(&req, &msg);
if (cc != SLURM_PROTOCOL_SUCCESS) {
slurm_perror("slurm_load_assoc_mgr_info error");
slurm_free_assoc_mgr_info_msg(msg);
return SLURM_ERROR;
}
for (i = 0; i < msg->tres_cnt; ++i) {
if (!strcasecmp(msg->tres_names[i], type)) {
slurm_free_assoc_mgr_info_msg(msg);
return SLURM_SUCCESS;
}
}
error("'%s' is not a configured TRES", type);
slurm_free_assoc_mgr_info_msg(msg);
return SLURM_ERROR;
}
static int _parse_resv_tres(char *val, resv_desc_msg_t *resv_msg_ptr,
int *free_tres_license, int *free_tres_bb,
int *free_tres_corecnt, int *free_tres_nodecnt)
{
int i, ret, len;
char *tres_bb = NULL, *tres_license = NULL,
*tres_corecnt = NULL, *tres_nodecnt = NULL,
*token, *type = NULL, *saveptr1 = NULL,
*value_str = NULL, *name = NULL, *compound = NULL,
*tmp;
bool discard, first;
*free_tres_license = 0;
*free_tres_bb = 0;
*free_tres_corecnt = 0;
*free_tres_nodecnt = 0;
token = strtok_r(val, ",", &saveptr1);
while (token) {
compound = strtok_r(token, "=", &value_str);
if (!value_str || !*value_str) {
error("TRES component '%s' has an invalid value '%s'",
type, token);
goto error;
}
if (strchr(compound, '/')) {
tmp = xstrdup(compound);
type = strtok_r(tmp, "/", &name);
} else
type = compound;
if (_is_configured_tres(compound) < 0)
goto error;
if (!strcasecmp(type, "license")) {
if (tres_license && tres_license[0] != '\0')
xstrcatchar(tres_license, ',');
xstrfmtcat(tres_license, "%s:%s", name, value_str);
token = strtok_r(NULL, ",", &saveptr1);
if (tmp)
xfree(tmp);
} else if (strcasecmp(type, "bb") == 0) {
if (tres_bb && tres_bb[0] != '\0')
xstrcatchar(tres_bb, ',');
xstrfmtcat(tres_bb, "%s:%s", name, value_str);
token = strtok_r(NULL, ",", &saveptr1);
if (tmp)
xfree(tmp);
} else if (strcasecmp(type, "cpu") == 0) {
first = true;
discard = false;
do {
len = strlen(value_str);
for (i = 0; i < len; i++) {
if (!isdigit(value_str[i])) {
if (first) {
error("TRES value '%s' "
"is invalid",
value_str);
goto error;
} else
discard = true;
break;
}
}
first = false;
if (!discard) {
if (tres_corecnt && tres_corecnt[0]
!= '\0')
xstrcatchar(tres_corecnt, ',');
xstrcat(tres_corecnt, value_str);
token = strtok_r(NULL, ",", &saveptr1);
value_str = token;
}
} while (!discard && token);
} else if (strcasecmp(type, "node") == 0) {
if (tres_nodecnt && tres_nodecnt[0] != '\0')
xstrcatchar(tres_nodecnt, ',');
xstrcat(tres_nodecnt, value_str);
token = strtok_r(NULL, ",", &saveptr1);
} else {
error("TRES type '%s' not supported with reservations",
compound);
goto error;
}
}
if (tres_corecnt && tres_corecnt[0] != '\0') {
ret = _parse_resv_core_cnt(resv_msg_ptr, tres_corecnt, true);
xfree(tres_corecnt);
if (ret != SLURM_SUCCESS)
goto error;
*free_tres_corecnt = 1;
}
if (tres_nodecnt && tres_nodecnt[0] != '\0') {
ret = _parse_resv_node_cnt(resv_msg_ptr, tres_nodecnt, true);
xfree(tres_nodecnt);
if (ret != SLURM_SUCCESS)
goto error;
*free_tres_nodecnt = 1;
}
if (tres_license && tres_license[0] != '\0') {
resv_msg_ptr->licenses = tres_license;
*free_tres_license = 1;
}
if (tres_bb && tres_bb[0] != '\0') {
resv_msg_ptr->burst_buffer = tres_bb;
*free_tres_bb = 1;
}
return SLURM_SUCCESS;
error:
xfree(tres_nodecnt);
xfree(tres_corecnt);
exit_code = 1;
return SLURM_ERROR;
}
/*
* scontrol_parse_res_options parse options for creating or updating a
reservation
reservation
* IN argc - count of arguments
* IN argv - list of arguments
* IN msg - a string to append to any error message
......@@ -89,13 +344,19 @@ static char * _process_plus_minus(char plus_or_minus, char *src)
extern int
scontrol_parse_res_options(int argc, char *argv[], const char *msg,
resv_desc_msg_t *resv_msg_ptr,
int *free_user_str, int *free_acct_str)
int *free_user_str, int *free_acct_str,
int *free_tres_license, int *free_tres_bb,
int *free_tres_corecnt, int *free_tres_nodecnt)
{
int i;
int duration = -3; /* -1 == INFINITE, -2 == error, -3 == not set */
*free_user_str = 0;
*free_acct_str = 0;
*free_tres_license = 0;
*free_tres_bb = 0;
*free_tres_corecnt = 0;
*free_tres_nodecnt = 0;
for (i=0; i<argc; i++) {
char *tag = argv[i];
......@@ -110,7 +371,7 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg,
} else if (!val || taglen == 0) {
exit_code = 1;
error("Unknown parameter %s. %s", argv[i], msg);
return -1;
return SLURM_ERROR;
}
if (val[-1] == '+' || val[-1] == '-') {
plus_minus = val[-1];
......@@ -138,7 +399,7 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg,
exit_code = 1;
error("Invalid start time %s. %s",
argv[i], msg);
return -1;
return SLURM_ERROR;
}
resv_msg_ptr->start_time = t;
......@@ -147,7 +408,7 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg,
if (errno == ESLURM_INVALID_TIME_VALUE) {
exit_code = 1;
error("Invalid end time %s. %s", argv[i],msg);
return -1;
return SLURM_ERROR;
}
resv_msg_ptr->end_time = t;
......@@ -157,7 +418,7 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg,
if (duration < 0 && duration != INFINITE) {
exit_code = 1;
error("Invalid duration %s. %s", argv[i],msg);
return -1;
return SLURM_ERROR;
}
resv_msg_ptr->duration = (uint32_t)duration;
......@@ -172,97 +433,25 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg,
f = parse_resv_flags(val, msg);
}
if (f == 0xffffffff) {
return -1;
return SLURM_ERROR;
} else {
resv_msg_ptr->flags = f;
}
} else if (strncasecmp(tag, "NodeCnt", MAX(taglen,5)) == 0 ||
strncasecmp(tag, "NodeCount", MAX(taglen,5)) == 0) {
char *endptr = NULL, *node_cnt, *tok, *ptrptr = NULL;
int node_inx = 0;
node_cnt = xstrdup(val);
tok = strtok_r(node_cnt, ",", &ptrptr);
while (tok) {
xrealloc(resv_msg_ptr->node_cnt,
sizeof(uint32_t) * (node_inx + 2));
resv_msg_ptr->node_cnt[node_inx] =
strtol(tok, &endptr, 10);
if ((endptr != NULL) &&
((endptr[0] == 'k') ||
(endptr[0] == 'K'))) {
resv_msg_ptr->node_cnt[node_inx] *=
1024;
} else if ((endptr != NULL) &&
((endptr[0] == 'm') ||
(endptr[0] == 'M'))) {
resv_msg_ptr->node_cnt[node_inx] *=
1024 * 1024;
} else if ((endptr == NULL) ||
(endptr[0] != '\0') ||
(tok[0] == '\0')) {
exit_code = 1;
error("Invalid node count %s. %s",
argv[i], msg);
xfree(node_cnt);
return -1;
}
node_inx++;
tok = strtok_r(NULL, ",", &ptrptr);
}
xfree(node_cnt);
if (_parse_resv_node_cnt(resv_msg_ptr, val, false)
== SLURM_ERROR)
return SLURM_ERROR;
} else if (strncasecmp(tag, "CoreCnt", MAX(taglen,5)) == 0 ||
strncasecmp(tag, "CoreCount", MAX(taglen,5)) == 0 ||
strncasecmp(tag, "CPUCnt", MAX(taglen,5)) == 0 ||
strncasecmp(tag, "CPUCount", MAX(taglen,5)) == 0) {
char *endptr = NULL, *core_cnt, *tok, *ptrptr = NULL;
char *type;
int node_inx = 0;
type = slurm_get_select_type();
if (strcasestr(type, "cray")) {
int param;
param = slurm_get_select_type_param();
if (! (param & CR_OTHER_CONS_RES)) {
error("CoreCnt or CPUCnt is only "
"suported when "
"SelectTypeParameters "
"includes OTHER_CONS_RES");
xfree(type);
return -1;
}
} else {
if (strcasestr(type, "cons_res") == NULL) {
error("CoreCnt or CPUCnt is only "
"suported when "
"SelectType includes "
"select/cons_res");
xfree(type);
return -1;
}
}
xfree(type);
core_cnt = xstrdup(val);
tok = strtok_r(core_cnt, ",", &ptrptr);
while (tok) {
xrealloc(resv_msg_ptr->core_cnt,
sizeof(uint32_t) * (node_inx + 2));
resv_msg_ptr->core_cnt[node_inx] =
strtol(tok, &endptr, 10);
if ((endptr == NULL) ||
(endptr[0] != '\0') ||
(tok[0] == '\0')) {
exit_code = 1;
error("Invalid core count %s. %s",
argv[i], msg);
xfree(core_cnt);
return -1;
}
node_inx++;
tok = strtok_r(NULL, ",", &ptrptr);
}
xfree(core_cnt);
if (_parse_resv_core_cnt(resv_msg_ptr, val, false)
== SLURM_ERROR)
return SLURM_ERROR;
} else if (strncasecmp(tag, "Nodes", MAX(taglen, 5)) == 0) {
resv_msg_ptr->node_list = val;
......@@ -277,6 +466,13 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg,
== 0) {
resv_msg_ptr->partition = val;
} else if (strncasecmp(tag, "TRES", MAX(taglen, 1)) == 0) {
if (_parse_resv_tres(val, resv_msg_ptr,
free_tres_license, free_tres_bb,
free_tres_corecnt,
free_tres_nodecnt) == SLURM_ERROR)
return SLURM_ERROR;
} else if (strncasecmp(tag, "Users", MAX(taglen, 1)) == 0) {
if (plus_minus) {
resv_msg_ptr->users =
......@@ -288,19 +484,19 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg,
} else if (strncasecmp(tag, "Watts", MAX(taglen, 1)) == 0) {
if (parse_uint32(val, &(resv_msg_ptr->resv_watts))) {
error("Invalid Watts value: %s", val);
return -1;
return SLURM_ERROR;
}
} else if (strncasecmp(tag, "res", 3) == 0) {
continue;
} else {
exit_code = 1;
error("Unknown parameter %s. %s", argv[i], msg);
return -1;
return SLURM_ERROR;
}
}
return 0;
}
return SLURM_SUCCESS;
}
/*
......@@ -316,12 +512,15 @@ scontrol_update_res(int argc, char *argv[])
{
resv_desc_msg_t resv_msg;
int err, ret = 0;
int free_user_str = 0, free_acct_str = 0;
int free_user_str = 0, free_acct_str = 0, free_tres_license = 0,
free_tres_bb = 0, free_tres_corecnt = 0, free_tres_nodecnt = 0;
slurm_init_resv_desc_msg (&resv_msg);
err = scontrol_parse_res_options(argc, argv, "No reservation update.",
&resv_msg, &free_user_str,
&free_acct_str);
&free_acct_str, &free_tres_license,
&free_tres_bb, &free_tres_corecnt,
&free_tres_nodecnt);
if (err)
goto SCONTROL_UPDATE_RES_CLEANUP;
......@@ -345,6 +544,14 @@ SCONTROL_UPDATE_RES_CLEANUP:
xfree(resv_msg.users);
if (free_acct_str)
xfree(resv_msg.accounts);
if (free_tres_license)
xfree(resv_msg.licenses);
if (free_tres_bb)
xfree(resv_msg.burst_buffer);
if (free_tres_corecnt)
xfree(resv_msg.core_cnt);
if (free_tres_nodecnt)
xfree(resv_msg.node_cnt);
return ret;
}
......@@ -361,13 +568,16 @@ scontrol_create_res(int argc, char *argv[])
{
resv_desc_msg_t resv_msg;
char *new_res_name = NULL;
int free_user_str = 0, free_acct_str = 0;
int free_user_str = 0, free_acct_str = 0, free_tres_license = 0,
free_tres_bb = 0, free_tres_corecnt = 0, free_tres_nodecnt = 0;
int err, ret = 0;
slurm_init_resv_desc_msg (&resv_msg);
err = scontrol_parse_res_options(argc, argv, "No reservation created.",
&resv_msg, &free_user_str,
&free_acct_str);
&free_acct_str, &free_tres_license,
&free_tres_bb, &free_tres_corecnt,
&free_tres_nodecnt);
if (err)
goto SCONTROL_CREATE_RES_CLEANUP;
......@@ -483,5 +693,14 @@ SCONTROL_CREATE_RES_CLEANUP:
xfree(resv_msg.users);
if (free_acct_str)
xfree(resv_msg.accounts);
if (free_tres_license)
xfree(resv_msg.licenses);
if (free_tres_bb)
xfree(resv_msg.burst_buffer);
if (free_tres_corecnt)
xfree(resv_msg.core_cnt);
if (free_tres_nodecnt)
xfree(resv_msg.node_cnt);
return ret;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment