From f3be07405eb2488d947a24e63689778cb377b3c0 Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Mon, 21 Jul 2014 14:20:27 -0700
Subject: [PATCH] Add job array modification logic

This is the infrastructure needed to modify a job array with the
new job array data structures. There will need to be additional
work once the RPC is fleshed out.
---
 src/slurmctld/job_mgr.c   | 211 ++++++++++++++++++++++++++++++++++----
 src/slurmctld/slurmctld.h |  13 ++-
 2 files changed, 201 insertions(+), 23 deletions(-)

diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 3583649ccbe..8e2b83a60c8 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -7980,22 +7980,14 @@ static void _merge_job_licenses(struct job_record *shrink_job_ptr,
 	return;
 }
 
-/*
- * update_job - update a job's parameters per the supplied specifications
- * IN job_specs - a job's specification
- * IN uid - uid of user issuing RPC
- * RET returns an error code from slurm_errno.h
- * global: job_list - global list of job entries
- *	last_job_update - time of last job table update
- */
-int update_job(job_desc_msg_t * job_specs, uid_t uid)
+static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs,
+		       uid_t uid)
 {
 	int error_code = SLURM_SUCCESS;
 	enum job_state_reason fail_reason;
 	bool authorized = false, admin = false;
 	uint32_t save_min_nodes = 0, save_max_nodes = 0;
 	uint32_t save_min_cpus = 0, save_max_cpus = 0;
-	struct job_record *job_ptr;
 	struct job_details *detail_ptr;
 	struct part_record *tmp_part_ptr;
 	bitstr_t *exc_bitmap = NULL, *req_bitmap = NULL;
@@ -8021,18 +8013,6 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 #endif
 	memset(&acct_policy_limit_set, 0, sizeof(acct_policy_limit_set_t));
 
-	/* Make sure anything that may be put in the database will be
-	 * lower case */
-	xstrtolower(job_specs->account);
-	xstrtolower(job_specs->wckey);
-
-	job_ptr = find_job_record(job_specs->job_id);
-	if (job_ptr == NULL) {
-		error("update_job: job_id %u does not exist.",
-		      job_specs->job_id);
-		return ESLURM_INVALID_JOB_ID;
-	}
-
 	error_code = job_submit_plugin_modify(job_specs, job_ptr,
 					      (uint32_t) uid);
 	if (error_code != SLURM_SUCCESS)
@@ -9707,6 +9687,193 @@ fini:
 	return error_code;
 }
 
+/*
+ * update_job - update a job's parameters per the supplied specifications
+ * IN job_specs - a job's specification
+ * IN uid - uid of user issuing RPC
+ * RET returns an error code from slurm_errno.h
+ * global: job_list - global list of job entries
+ *	last_job_update - time of last job table update
+ */
+extern int update_job(job_desc_msg_t * job_specs, uid_t uid)
+{
+	struct job_record *job_ptr;
+
+	/* Make sure anything that may be put in the database will be
+	 * lower case */
+	xstrtolower(job_specs->account);
+	xstrtolower(job_specs->wckey);
+
+	job_ptr = find_job_record(job_specs->job_id);
+	if (job_ptr == NULL) {
+		error("update_job: job_id %u does not exist.",
+		      job_specs->job_id);
+		return ESLURM_INVALID_JOB_ID;
+	}
+	return _update_job(job_ptr, job_specs, uid);
+}
+
+/*
+ * update_job_str - update a job's parameters per the supplied specifications
+ * IN job_specs - a job's specification
+ * IN uid - uid of user issuing RPC
+ * RET returns an error code from slurm_errno.h
+ * global: job_list - global list of job entries
+ *	last_job_update - time of last job table update
+ */
+extern int update_job_str(job_desc_msg_t * job_specs, uid_t uid)
+{
+	struct job_record *job_ptr, *new_job_ptr;
+	slurm_ctl_conf_t *conf;
+	long int long_id;
+	uint32_t job_id;
+	bitstr_t *array_bitmap, *tmp_bitmap;
+	bool valid = true;
+	int32_t i, i_first, i_last;
+	int len, rc, rc2;
+	char *end_ptr, *tok, *tmp;
+
+	if (max_array_size == NO_VAL) {
+		conf = slurm_conf_lock();
+		max_array_size = conf->max_array_sz;
+		slurm_conf_unlock();
+	}
+
+	/* Make sure anything that may be put in the database will be
+	 * lower case */
+	xstrtolower(job_specs->account);
+	xstrtolower(job_specs->wckey);
+
+	long_id = strtol(job_id_str, &end_ptr, 10);
+	if ((long_id <= 0) || (long_id == LONG_MAX) ||
+	    ((end_ptr[0] != '\0') && (end_ptr[0] != '_'))) {
+		info("update_job_str: invalid job id %s", job_id_str);
+		return ESLURM_INVALID_JOB_ID;
+	}
+	job_id = (uint32_t) long_id;
+	if (end_ptr[0] == '\0') {	/* Single job (or full job array) */
+		struct job_record *job_ptr_done = NULL;
+		job_ptr = find_job_record(job_id);
+		if (job_ptr && (job_ptr->array_task_id == NO_VAL) &&
+		    (job_ptr->array_recs == NULL)) {
+			/* This is a regular job, not a job array */
+			return _update_job(job_ptr, job_specs, uid);
+		}
+
+		if (job_ptr && job_ptr->array_recs) {
+			/* This is a job array */
+			rc = _update_job(job_ptr, job_specs, uid);
+			job_ptr_done = job_ptr;
+		}
+
+		/* Update all tasks of this job array */
+		job_ptr = job_array_hash_j[JOB_HASH_INX(job_id)];
+		if (!job_ptr && !job_ptr_done) {
+			info("update_job_str: invalid job id %u", job_id);
+			return ESLURM_INVALID_JOB_ID;
+		}
+		while (job_ptr) {
+			if ((job_ptr->array_job_id == job_id) &&
+			    (job_ptr != job_ptr_done)) {
+				rc2 = _update_job(job_ptr, job_specs, uid);
+				rc = MAX(rc, rc2);
+			}
+			job_ptr = job_ptr->job_array_next_j;
+		}
+		return rc;
+
+	}
+
+	array_bitmap = bit_alloc(max_array_size);
+	tmp = xstrdup(end_ptr + 1);
+	tok = strtok_r(tmp, ",", &end_ptr);
+	while (tok && valid) {
+		valid = _parse_array_tok(tok, array_bitmap,
+					 max_array_size);
+		tok = strtok_r(NULL, ",", &end_ptr);
+	}
+	xfree(tmp);
+	if (valid) {
+		i_last = bit_fls(array_bitmap);
+		if (i_last < 0)
+			valid = false;
+	}
+	if (!valid) {
+		info("update_job_str: invalid job id %s", job_id_str);
+		return ESLURM_INVALID_JOB_ID;
+	}
+
+	job_ptr = find_job_record(job_id);
+	if (job_ptr && IS_JOB_PENDING(job_ptr) &&
+	    job_ptr->array_recs && job_ptr->array_recs->task_id_bitmap) {
+		/* Ensure bitmap sizes match for AND operations */
+		len = bit_size(job_ptr->array_recs->task_id_bitmap);
+		i_last++;
+		if (i_last < len) {
+			bit_realloc(array_bitmap, len);
+		} else {
+			len = bit_size(array_bitmap);
+			bit_realloc(array_bitmap, i_last);
+			bit_realloc(job_ptr->array_recs->task_id_bitmap,i_last);
+		}
+		if (!bit_overlap(job_ptr->array_recs->task_id_bitmap,
+				 array_bitmap)) {
+			/* Nothing to do with this job record */
+		} else if (bit_super_set(job_ptr->array_recs->task_id_bitmap,
+					 array_bitmap)) {
+			/* Update the record with all pending tasks */
+			rc = _update_job(job_ptr, job_specs, uid);
+			bit_not(job_ptr->array_recs->task_id_bitmap);
+			bit_and(array_bitmap,
+				job_ptr->array_recs->task_id_bitmap);
+			bit_not(job_ptr->array_recs->task_id_bitmap);
+		} else {
+			/* Need to split out tasks to separate job records */
+			tmp_bitmap = bit_copy(job_ptr->array_recs->
+					      task_id_bitmap);
+			bit_and(tmp_bitmap, array_bitmap);
+			i_first = bit_ffs(tmp_bitmap);
+			if (i_first >= 0)
+				i_last = bit_fls(tmp_bitmap);
+			else
+				i_last = -2;
+			for (i = i_first; i <= i_last; i++) {
+				if (!bit_test(array_bitmap, i))
+					continue;
+				job_ptr->array_task_id = i;
+				new_job_ptr = _job_rec_copy(job_ptr);
+				if (!new_job_ptr) {
+					error("update_job_str: Unable to copy "
+					      "record for job %u",
+					      job_ptr->job_id);
+				}
+			}
+			FREE_NULL_BITMAP(tmp_bitmap);
+		}
+	}
+
+	i_first = bit_ffs(array_bitmap);
+	if (i_first >= 0)
+		i_last = bit_fls(array_bitmap);
+	else
+		i_last = -2;
+	for (i = i_first; i <= i_last; i++) {
+		if (!bit_test(array_bitmap, i))
+			continue;
+		job_ptr = find_job_array_rec(job_id, i);
+		if (job_ptr == NULL) {
+			info("update_job_str: invalid job id %u_%d", job_id, i);
+			rc = ESLURM_INVALID_JOB_ID;
+			continue;
+		}
+
+		rc2 = _update_job(job_ptr, job_specs, uid);
+		rc = MAX(rc, rc2);
+	}
+
+	return rc;
+}
+
 static void _send_job_kill(struct job_record *job_ptr)
 {
 	kill_job_msg_t *kill_job = NULL;
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 6bfc12cd589..f9488ca1d26 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -1950,6 +1950,7 @@ extern int sync_job_files(void);
 /* After recovering job state, if using priority/basic then we increment the
  * priorities of all jobs to avoid decrementing the base down to zero */
 extern void sync_job_priorities(void);
+
 /*
  * update_job - update a job's parameters per the supplied specifications
  * IN job_specs - a job's specification
@@ -1958,7 +1959,17 @@ extern void sync_job_priorities(void);
  * global: job_list - global list of job entries
  *	last_job_update - time of last job table update
  */
-extern int update_job (job_desc_msg_t * job_specs, uid_t uid);
+extern int update_job(job_desc_msg_t * job_specs, uid_t uid);
+
+/*
+ * update_job_str - update a job's parameters per the supplied specifications
+ * IN job_specs - a job's specification
+ * IN uid - uid of user issuing RPC
+ * RET returns an error code from slurm_errno.h
+ * global: job_list - global list of job entries
+ *	last_job_update - time of last job table update
+ */
+extern int update_job_str(job_desc_msg_t * job_specs, uid_t uid);
 
 /*
  * Modify the account associated with a pending job
-- 
GitLab