Newer
Older
free(this_node_name);
}
hostlist_destroy(host_list);
if (job_ptr->node_cnt != node_inx) {
error("Node count mismatch for JobId=%u (%u,%u)",
job_ptr->job_id, job_ptr->node_cnt, node_inx);
job_ptr->node_cnt = node_inx;
job_ptr->num_cpu_groups = cpu_inx + 1;
* _valid_features - determine if the requested features are satisfied by
* those available
* IN requested - requested features (by a job)
* IN available - available features (on a node)
* RET 0 if request is not satisfied, otherwise an integer indicating which
* mutually exclusive feature is satisfied. for example
* _valid_features("[fs1|fs2|fs3|fs4]", "fs3") returns 3. see the
* slurm administrator and user guides for details. returns 1 if
* requirements are satisfied without mutually exclusive feature list.
static int _valid_features(char *requested, char *available)
char *tmp_requested, *str_ptr1;
int bracket, found, i, option, position, result;
int last_op; /* last operation 0 for or, 1 for and */
int save_op = 0, save_result = 0; /* for bracket support */
if (requested == NULL)
return 1; /* no constraints */
if (available == NULL)
return 0; /* no features */
tmp_requested = xstrdup(requested);
bracket = option = position = 0;
str_ptr1 = tmp_requested; /* start of feature name */
result = last_op = 1; /* assume good for now */
for (i = 0;; i++) {
if (tmp_requested[i] == (char) NULL) {
if (strlen(str_ptr1) == 0)
break;
found = _match_feature(str_ptr1, available);
if (last_op == 1) /* and */
result &= found;
else /* or */
result |= found;
break;
if (tmp_requested[i] == '&') {
if (bracket != 0) {
debug("_valid_features: parsing failure on %s",
requested);
result = 0;
break;
tmp_requested[i] = (char) NULL;
found = _match_feature(str_ptr1, available);
if (last_op == 1) /* and */
result &= found;
else /* or */
result |= found;
str_ptr1 = &tmp_requested[i + 1];
last_op = 1; /* and */
} else if (tmp_requested[i] == '|') {
tmp_requested[i] = (char) NULL;
found = _match_feature(str_ptr1, available);
if (bracket != 0) {
if (found)
option = position;
position++;
}
if (last_op == 1) /* and */
result &= found;
else /* or */
result |= found;
str_ptr1 = &tmp_requested[i + 1];
last_op = 0; /* or */
} else if (tmp_requested[i] == '[') {
bracket++;
position = 1;
save_op = last_op;
save_result = result;
last_op = result = 1;
str_ptr1 = &tmp_requested[i + 1];
} else if (tmp_requested[i] == ']') {
tmp_requested[i] = (char) NULL;
found = _match_feature(str_ptr1, available);
if (found)
option = position;
result |= found;
if (save_op == 1) /* and */
result &= save_result;
else /* or */
result |= save_result;
if ((tmp_requested[i + 1] == '&')
&& (bracket == 1)) {
last_op = 1;
str_ptr1 = &tmp_requested[i + 2];
} else if ((tmp_requested[i + 1] == '|')
&& (bracket == 1)) {
last_op = 0;
str_ptr1 = &tmp_requested[i + 2];
} else if ((tmp_requested[i + 1] == (char) NULL)
&& (bracket == 1)) {
break;
debug("_valid_features: parsing failure on %s",
requested);
result = 0;
break;
bracket = 0;
if (position)
result *= option;
xfree(tmp_requested);
return result;
}
/*
* re_kill_job - for a given job, deallocate its nodes for a second time,
* basically a cleanup for failed deallocate() calls
* IN job_ptr - pointer to terminating job (already in some COMPLETING state)
* globals: node_record_count - number of nodes in the system
* node_record_table_ptr - pointer to global node table
*/
extern void re_kill_job(struct job_record *job_ptr)
{
int i;
kill_job_msg_t *kill_job;
agent_arg_t *agent_args;
int buf_rec_size = 0;
hostlist_t kill_hostlist = hostlist_create("");
char host_str[64];
xassert(job_ptr);
xassert(job_ptr->details);
agent_args = xmalloc(sizeof(agent_arg_t));
agent_args->msg_type = REQUEST_KILL_JOB;
agent_args->retry = 0;
kill_job = xmalloc(sizeof(kill_job_msg_t));
kill_job->job_id = job_ptr->job_id;
kill_job->job_uid = job_ptr->user_id;
for (i = 0; i < node_record_count; i++) {
struct node_record *node_ptr = &node_record_table_ptr[i];
if ((job_ptr->node_bitmap == NULL) ||
(bit_test(job_ptr->node_bitmap, i) == 0))
continue;
if ((node_ptr->node_state & (~NODE_STATE_NO_RESPOND))
== NODE_STATE_DOWN) {
/* Consider job already completed */
bit_clear(job_ptr->node_bitmap, i);
if (node_ptr->comp_job_cnt)
(node_ptr->comp_job_cnt)--;
if ((--job_ptr->node_cnt) == 0) {
last_node_update = time(NULL);
delete_all_step_records(job_ptr);
job_ptr->job_state &= (~JOB_COMPLETING);
}
continue;
}
if (node_ptr->node_state & NODE_STATE_NO_RESPOND)
continue;
(void) hostlist_push_host(kill_hostlist, node_ptr->name);
#ifdef HAVE_FRONT_END /* Operate only on front-end */
if (agent_args->node_count > 0)
continue;
#endif
if ((agent_args->node_count + 1) > buf_rec_size) {
buf_rec_size += 128;
xrealloc((agent_args->slurm_addr),
(sizeof(struct sockaddr_in) * buf_rec_size));
xrealloc((agent_args->node_names),
(MAX_NAME_LEN * buf_rec_size));
}
agent_args->slurm_addr[agent_args->node_count] =
node_ptr->slurm_addr;
strncpy(&agent_args->
node_names[MAX_NAME_LEN * agent_args->node_count],
node_ptr->name, MAX_NAME_LEN);
agent_args->node_count++;
}
if (agent_args->node_count == 0) {
xfree(kill_job);
xfree(agent_args);
hostlist_destroy(kill_hostlist);
return;
}
hostlist_uniq(kill_hostlist);
hostlist_ranged_string(kill_hostlist,
sizeof(host_str), host_str);
info("Resending KILL_JOB request JobId=%u Nodelist=%s",
job_ptr->job_id, host_str);
hostlist_destroy(kill_hostlist);
agent_args->msg_args = kill_job;
agent_queue_request(agent_args);