From 31113a5d45104ca6519d836bde66d5e1b43d2e7e Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 3 Mar 2011 21:38:38 +0000
Subject: [PATCH] -- Add the ability for scontrol to modify slurmctld
 DebugFlags values.

---
 NEWS                             |  1 +
 RELEASE_NOTES                    |  4 +--
 doc/man/man1/scontrol.1          | 10 +++++-
 slurm/slurm.h.in                 |  9 +++++
 src/api/config_info.c            |  2 +-
 src/api/reconfigure.c            | 42 ++++++++++++++++++++++-
 src/common/slurm_protocol_api.c  | 14 ++++++++
 src/common/slurm_protocol_api.h  |  4 +++
 src/common/slurm_protocol_defs.c |  8 +++++
 src/common/slurm_protocol_defs.h |  7 ++++
 src/common/slurm_protocol_pack.c | 43 +++++++++++++++++++++++
 src/common/slurm_protocol_util.c |  1 +
 src/scontrol/scontrol.c          | 59 ++++++++++++++++++++++++++++++++
 src/slurmctld/proc_req.c         | 44 +++++++++++++++++++++---
 14 files changed, 237 insertions(+), 11 deletions(-)

diff --git a/NEWS b/NEWS
index 9fb04dfda96..d58266fb635 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,7 @@ documents those changes that are of interest to users and admins.
 =============================
  -- Add GraceTime to Partition and QOS data structures. Preempted jobs will be
     given this time interval before termination. Work by Bill Brophy, Bull.
+ -- Add the ability for scontrol to modify slurmctld DebugFlags values.
 
 * Changes in SLURM 2.3.0.pre3
 =============================
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index f59ab0f2965..d098ac8029e 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -37,9 +37,7 @@ CONFIGURATION FILE CHANGES (see "man slurm.conf" for details)
 * In order to support more than one front end node, new parameters have been
   added to support a new data structure: FrontendName, FrontendAddr, Port,
   State and Reason.
-
 * DebugFlags of Frontend added
-
 * Added new configuration parameter MaxJobId. Use with FirstJobId to limit
   range of job ID values.
 
@@ -47,7 +45,7 @@ CONFIGURATION FILE CHANGES (see "man slurm.conf" for details)
 COMMAND CHANGES (see man pages for details)
 ===========================================
 * scontrol has the ability to get and set front end node state.
-
+* scontrol has the ability to set slurmctld's DebugFlags.
 * Add new scontrol option of "show aliases" to report every NodeName that is
   associated with a given NodeHostName when running multiple slurmd daemons
   per compute node (typically used for testing purposes).
diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1
index 1c0fe6d0965..ea039f61003 100644
--- a/doc/man/man1/scontrol.1
+++ b/doc/man/man1/scontrol.1
@@ -1,4 +1,4 @@
-.TH SCONTROL "1" "December 2010" "scontrol 2.3" "Slurm components"
+.TH SCONTROL "1" "March 2011" "scontrol 2.3" "Slurm components"
 
 .SH "NAME"
 scontrol \- Used view and modify Slurm configuration and state.
@@ -276,6 +276,14 @@ This value is temporary and will be overwritten whenever the slurmctld
 daemon reads the slurm.conf configuration file (e.g. when the daemon
 is restarted or \fBscontrol reconfigure\fR is executed).
 
+.TP
+\fBsetdebugflags\fP [+|\-]\fIFLAG\fP
+Add or remove DebugFlags of the slurmctld daemon.
+See "man slurm.conf" for a list of supported DebugFlags.
+NOTE: Changing the value of some DebugFlags will have no effect without
+restarting the slurmctld daemon, which would set DebugFlags based upon the
+contents of the slurm.conf configuration file.
+
 .TP
 \fBshow\fP \fIENTITY\fP \fIID\fP
 Display the state of the specified entity with the specified identification.
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index 3f7904aece4..47cac5a8936 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -3147,6 +3147,15 @@ extern int slurm_shutdown PARAMS((uint16_t options));
  */
 extern int slurm_takeover PARAMS((void));
 
+/*
+ * slurm_set_debugflags - issue RPC to set slurm controller debug flags
+ * IN debug_flags_plus  - debug flags to be added
+ * IN debug_flags_minus - debug flags to be removed
+ * RET 0 on success, otherwise return -1 and set errno to indicate the error
+ */
+extern int slurm_set_debugflags PARAMS((uint32_t debug_flags_plus,
+					uint32_t debug_flags_minus));
+
 /*
  * slurm_set_debug_level - issue RPC to set slurm controller debug level
  * IN debug_level - requested debug level
diff --git a/src/api/config_info.c b/src/api/config_info.c
index 351592c466d..0038972dc0f 100644
--- a/src/api/config_info.c
+++ b/src/api/config_info.c
@@ -255,7 +255,7 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr)
 
 	key_pair = xmalloc(sizeof(config_key_pair_t));
 	key_pair->name = xstrdup("DebugFlags");
-	key_pair->value = debug_flags2str(slurm_ctl_conf_ptr->debug_flags);;
+	key_pair->value = debug_flags2str(slurm_ctl_conf_ptr->debug_flags);
 	list_append(ret_list, key_pair);
 
 	key_pair = xmalloc(sizeof(config_key_pair_t));
diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c
index cb9ff603f4b..a8867490f5e 100644
--- a/src/api/reconfigure.c
+++ b/src/api/reconfigure.c
@@ -189,12 +189,52 @@ _send_message_controller (enum controller_id dest, slurm_msg_t *req)
         return rc;
 }
 
+/*
+ * slurm_set_debugflags - issue RPC to set slurm controller debug flags
+ * IN debug_flags_plus  - debug flags to be added
+ * IN debug_flags_minus - debug flags to be removed
+ * IN debug_flags_set   - new debug flags value
+ * RET 0 on success, otherwise return -1 and set errno to indicate the error
+ */
+extern int
+slurm_set_debugflags (uint32_t debug_flags_plus, uint32_t debug_flags_minus)
+{
+	int rc;
+	slurm_msg_t req_msg;
+	slurm_msg_t resp_msg;
+	set_debug_flags_msg_t req;
+
+	slurm_msg_t_init(&req_msg);
+	slurm_msg_t_init(&resp_msg);
+
+	req.debug_flags_minus = debug_flags_minus;
+	req.debug_flags_plus  = debug_flags_plus;
+	req_msg.msg_type = REQUEST_SET_DEBUG_FLAGS;
+	req_msg.data     = &req;
+
+	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
+		return SLURM_ERROR;
+
+	switch (resp_msg.msg_type) {
+	case RESPONSE_SLURM_RC:
+		rc = ((return_code_msg_t *) resp_msg.data)->return_code;
+		slurm_free_return_code_msg(resp_msg.data);
+		if (rc)
+			slurm_seterrno_ret(rc);
+		break;
+	default:
+		slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
+		break;
+	}
+        return SLURM_PROTOCOL_SUCCESS;
+}
+
 /*
  * slurm_set_debug_level - issue RPC to set slurm controller debug level
  * IN debug_level - requested debug level
  * RET 0 on success, otherwise return -1 and set errno to indicate the error
  */
-int
+extern int
 slurm_set_debug_level (uint32_t debug_level)
 {
 	int rc;
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 9a205877a0f..2f9478c12ff 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -331,6 +331,20 @@ uint32_t slurm_get_debug_flags(void)
 	return debug_flags;
 }
 
+/* slurm_set_debug_flags
+ */
+void slurm_set_debug_flags(uint32_t debug_flags)
+{
+	slurm_ctl_conf_t *conf;
+
+	if (slurmdbd_conf) {
+	} else {
+		conf = slurm_conf_lock();
+		conf->debug_flags = debug_flags;
+		slurm_conf_unlock();
+	}
+}
+
 /* slurm_get_max_mem_per_cpu
  * RET MaxMemPerCPU/Node value from slurm.conf
  */
diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h
index 146590fd678..46e44eb01fc 100644
--- a/src/common/slurm_protocol_api.h
+++ b/src/common/slurm_protocol_api.h
@@ -128,6 +128,10 @@ uint16_t slurm_get_complete_wait(void);
  */
 uint32_t slurm_get_debug_flags(void);
 
+/* slurm_set_debug_flags
+ */
+void slurm_set_debug_flags(uint32_t debug_flags);
+
 /* slurm_get_def_mem_per_cpu
  * RET DefMemPerCPU/Node value from slurm.conf
  */
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 9c4374fac3b..e415ec75c81 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -2097,6 +2097,11 @@ inline void slurm_free_trigger_msg(trigger_info_msg_t *msg)
 	xfree(msg);
 }
 
+void slurm_free_set_debug_flags_msg(set_debug_flags_msg_t *msg)
+{
+	xfree(msg);
+}
+
 void slurm_free_set_debug_level_msg(set_debug_level_msg_t *msg)
 {
 	xfree(msg);
@@ -2339,6 +2344,9 @@ extern int slurm_free_msg_data(slurm_msg_type_t type, void *data)
 	case RESPONSE_SLURM_RC:
 		slurm_free_return_code_msg(data);
 		break;
+	case REQUEST_SET_DEBUG_FLAGS:
+		slurm_free_set_debug_flags_msg(data);
+		break;
 	case REQUEST_SET_DEBUG_LEVEL:
 	case REQUEST_SET_SCHEDLOG_LEVEL:
 		slurm_free_set_debug_level_msg(data);
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index 5e213e7c4c6..385672a5970 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -179,6 +179,7 @@ typedef enum {
 	REQUEST_HEALTH_CHECK,
 	REQUEST_TAKEOVER,
 	REQUEST_SET_SCHEDLOG_LEVEL,
+	REQUEST_SET_DEBUG_FLAGS,
 
 	REQUEST_BUILD_INFO = 2001,
 	RESPONSE_BUILD_INFO,
@@ -564,6 +565,11 @@ typedef struct last_update_msg {
 	time_t last_update;
 } last_update_msg_t;
 
+typedef struct set_debug_flags_msg {
+	uint32_t debug_flags_minus;
+	uint32_t debug_flags_plus;
+} set_debug_flags_msg_t;
+
 typedef struct set_debug_level_msg {
 	uint32_t debug_level;
 } set_debug_level_msg_t;
@@ -959,6 +965,7 @@ inline void slurm_free_front_end_info_request_msg(
 inline void slurm_free_node_info_request_msg(node_info_request_msg_t *msg);
 inline void slurm_free_part_info_request_msg(part_info_request_msg_t *msg);
 inline void slurm_free_resv_info_request_msg(resv_info_request_msg_t *msg);
+inline void slurm_free_set_debug_flags_msg(set_debug_flags_msg_t *msg);
 inline void slurm_free_set_debug_level_msg(set_debug_level_msg_t *msg);
 inline void slurm_destroy_association_shares_object(void *object);
 inline void slurm_free_shares_request_msg(shares_request_msg_t *msg);
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 9924cc57de4..8678a5c4fef 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -538,6 +538,12 @@ static void _pack_job_notify(job_notify_msg_t *msg, Buf buffer,
 static int  _unpack_job_notify(job_notify_msg_t **msg_ptr, Buf buffer,
 			       uint16_t protocol_version);
 
+static void _pack_set_debug_flags_msg(set_debug_flags_msg_t * msg, Buf buffer,
+				      uint16_t protocol_version);
+static int _unpack_set_debug_flags_msg(set_debug_flags_msg_t ** msg_ptr,
+				       Buf buffer,
+				       uint16_t protocol_version);
+
 static void _pack_set_debug_level_msg(set_debug_level_msg_t * msg, Buf buffer,
 				      uint16_t protocol_version);
 static int _unpack_set_debug_level_msg(set_debug_level_msg_t ** msg_ptr,
@@ -1098,6 +1104,11 @@ pack_msg(slurm_msg_t const *msg, Buf buffer)
 		_pack_job_notify((job_notify_msg_t *) msg->data, buffer,
 				 msg->protocol_version);
 		break;
+	case REQUEST_SET_DEBUG_FLAGS:
+		_pack_set_debug_flags_msg(
+			(set_debug_flags_msg_t *)msg->data, buffer,
+			msg->protocol_version);
+		break;
 	case REQUEST_SET_DEBUG_LEVEL:
 	case REQUEST_SET_SCHEDLOG_LEVEL:
 		_pack_set_debug_level_msg(
@@ -1624,6 +1635,11 @@ unpack_msg(slurm_msg_t * msg, Buf buffer)
 					 &msg->data, buffer,
 					 msg->protocol_version);
 		break;
+	case REQUEST_SET_DEBUG_FLAGS:
+		rc = _unpack_set_debug_flags_msg(
+			(set_debug_flags_msg_t **)&(msg->data), buffer,
+			msg->protocol_version);
+		break;
 	case REQUEST_SET_DEBUG_LEVEL:
 	case REQUEST_SET_SCHEDLOG_LEVEL:
 		rc = _unpack_set_debug_level_msg(
@@ -9302,6 +9318,33 @@ unpack_error:
 	return SLURM_ERROR;
 }
 
+static void
+_pack_set_debug_flags_msg(set_debug_flags_msg_t * msg, Buf buffer,
+			  uint16_t protocol_version)
+{
+	pack32(msg->debug_flags_minus, buffer);
+	pack32(msg->debug_flags_plus,  buffer);
+}
+
+static int
+_unpack_set_debug_flags_msg(set_debug_flags_msg_t ** msg_ptr, Buf buffer,
+			    uint16_t protocol_version)
+{
+	set_debug_flags_msg_t *msg;
+
+	msg = xmalloc(sizeof(set_debug_flags_msg_t));
+	*msg_ptr = msg;
+
+	safe_unpack32(&msg->debug_flags_minus, buffer);
+	safe_unpack32(&msg->debug_flags_plus,  buffer);
+	return SLURM_SUCCESS;
+
+unpack_error:
+	slurm_free_set_debug_flags_msg(msg);
+	*msg_ptr = NULL;
+	return SLURM_ERROR;
+}
+
 static void
 _pack_set_debug_level_msg(set_debug_level_msg_t * msg, Buf buffer,
 			  uint16_t protocol_version)
diff --git a/src/common/slurm_protocol_util.c b/src/common/slurm_protocol_util.c
index 5f58245225f..8d783015c6d 100644
--- a/src/common/slurm_protocol_util.c
+++ b/src/common/slurm_protocol_util.c
@@ -87,6 +87,7 @@ int check_header_version(header_t * header)
 		case REQUEST_PRIORITY_FACTORS:
 		case REQUEST_RECONFIGURE:
 		case REQUEST_RESERVATION_INFO:
+		case REQUEST_SET_DEBUG_FLAGS:
 		case REQUEST_SET_DEBUG_LEVEL:
 		case REQUEST_SHARE_INFO:
 		case REQUEST_SHUTDOWN:
diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c
index 5a81d2c6e46..f10e561192e 100644
--- a/src/scontrol/scontrol.c
+++ b/src/scontrol/scontrol.c
@@ -846,6 +846,64 @@ _process_command (int argc, char *argv[])
 				exit_code = 1;
 		}
 	}
+	else if (strncasecmp (tag, "setdebugflags", MAX(tag_len, 9)) == 0) {
+		if (argc > 2) {
+			exit_code = 1;
+			if (quiet_flag != 1)
+				fprintf(stderr,
+					"too many arguments for keyword:%s\n",
+					tag);
+		} else if (argc < 2) {
+			exit_code = 1;
+			if (quiet_flag != 1)
+				fprintf(stderr,
+					"too few arguments for keyword:%s\n",
+					tag);
+		} else {
+			int i, mode = 0;
+			uint32_t debug_flags_plus  = 0;
+			uint32_t debug_flags_minus = 0, flags;
+
+			for (i = 1; i < argc; i++) {
+				if (argv[i][0] == '+')
+					mode = 1;
+				else if (argv[i][0] == '-')
+					mode = -1;
+				else {
+					mode = 0;
+					break;
+				}
+				flags = debug_str2flags(&argv[i][1]);
+				if (flags == NO_VAL)
+					break;
+				if (mode == 1)
+					debug_flags_plus  |= flags;
+				else
+					debug_flags_minus |= flags;
+			}
+			if (i < argc) {
+				exit_code = 1;
+				if (quiet_flag != 1) {
+					fprintf(stderr, "invalid debug "
+						"flag: %s\n", argv[i]);
+				}
+				if ((quiet_flag != 1) &&  (mode = 0)) {
+					fprintf(stderr, "Usage: setdebugflags"
+						" [+|-]NAME\n");
+				}
+			} else {
+				error_code = slurm_set_debugflags(
+					debug_flags_plus, debug_flags_minus);
+				if (error_code) {
+					exit_code = 1;
+					if (quiet_flag != 1)
+						slurm_perror(
+							"slurm_set_debug_flags"
+							" error");
+				}
+			}
+		}
+	}
 	else if (strncasecmp (tag, "setdebug", MAX(tag_len, 2)) == 0) {
 		if (argc > 2) {
 			exit_code = 1;
@@ -1623,6 +1681,7 @@ scontrol [<OPTION>] [<COMMAND>]                                            \n\
      requeue <job_id>         re-queue a batch job                         \n\
      resume <job_id>          resume previously suspended job (see suspend)\n\
      setdebug <level>         set slurmctld debug level                    \n\
+     setdebugflags [+|-]<flag>  add or remove slurmctld DebugFlags         \n\
      schedloglevel <slevel>   set scheduler log level                      \n\
      show <ENTITY> [<ID>]     display state of identified entity, default  \n\
 			      is all records.                              \n\
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 15f990e281b..19c6a80e963 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -134,6 +134,7 @@ inline static void  _slurm_rpc_resv_delete(slurm_msg_t * msg);
 inline static void  _slurm_rpc_resv_show(slurm_msg_t * msg);
 inline static void  _slurm_rpc_requeue(slurm_msg_t * msg);
 inline static void  _slurm_rpc_takeover(slurm_msg_t * msg);
+inline static void  _slurm_rpc_set_debug_flags(slurm_msg_t *msg);
 inline static void  _slurm_rpc_set_debug_level(slurm_msg_t *msg);
 inline static void  _slurm_rpc_set_schedlog_level(slurm_msg_t *msg);
 inline static void  _slurm_rpc_shutdown_controller(slurm_msg_t * msg);
@@ -390,6 +391,10 @@ void slurmctld_req (slurm_msg_t * msg)
 		_slurm_rpc_job_notify(msg);
 		slurm_free_job_notify_msg(msg->data);
 		break;
+	case REQUEST_SET_DEBUG_FLAGS:
+		_slurm_rpc_set_debug_flags(msg);
+		slurm_free_set_debug_flags_msg(msg->data);
+		break;
 	case REQUEST_SET_DEBUG_LEVEL:
 		_slurm_rpc_set_debug_level(msg);
 		slurm_free_set_debug_level_msg(msg->data);
@@ -3833,13 +3838,42 @@ inline static void  _slurm_rpc_job_notify(slurm_msg_t * msg)
 	slurm_send_rc_msg(msg, error_code);
 }
 
-/* defined in controller.c */
+inline static void  _slurm_rpc_set_debug_flags(slurm_msg_t *msg)
+{
+	uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
+	slurmctld_lock_t config_write_lock =
+		{ WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
+	set_debug_flags_msg_t *request_msg =
+		(set_debug_flags_msg_t *) msg->data;
+	uint32_t debug_flags;
+	char *flag_string;
+
+	debug2("Processing RPC: REQUEST_SET_DEBUG_FLAGS from uid=%d", uid);
+	if (!validate_super_user(uid)) {
+		error("set debug flags request from non-super user uid=%d",
+		      uid);
+		slurm_send_rc_msg(msg, EACCES);
+		return;
+	}
+
+	lock_slurmctld (config_write_lock);
+	debug_flags  = slurm_get_debug_flags();
+	debug_flags &= (~request_msg->debug_flags_minus);
+	debug_flags |= request_msg->debug_flags_plus;
+	slurm_set_debug_flags(debug_flags);
+	unlock_slurmctld (config_write_lock);
+	flag_string = debug_flags2str(debug_flags);
+	info("Set DebugFlags to %s", flag_string);
+	xfree(flag_string);
+	slurm_send_rc_msg(msg, SLURM_SUCCESS);
+}
+
 inline static void  _slurm_rpc_set_debug_level(slurm_msg_t *msg)
 {
 	int debug_level, old_debug_level;
 	uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
-	slurmctld_lock_t config_read_lock =
-		{ READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
+	slurmctld_lock_t config_write_lock =
+		{ WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
 	set_debug_level_msg_t *request_msg =
 		(set_debug_level_msg_t *) msg->data;
 	log_options_t log_opts = LOG_OPTS_INITIALIZER;
@@ -3858,7 +3892,7 @@ inline static void  _slurm_rpc_set_debug_level(slurm_msg_t *msg)
 	debug_level = MIN (request_msg->debug_level, (LOG_LEVEL_END - 1));
 	debug_level = MAX (debug_level, LOG_LEVEL_QUIET);
 
-	lock_slurmctld (config_read_lock);
+	lock_slurmctld (config_write_lock);
 	if (slurmctld_config.daemonize) {
 		log_opts.stderr_level = LOG_LEVEL_QUIET;
 		if (slurmctld_conf.slurmctld_logfile) {
@@ -3877,7 +3911,7 @@ inline static void  _slurm_rpc_set_debug_level(slurm_msg_t *msg)
 			log_opts.logfile_level = LOG_LEVEL_QUIET;
 	}
 	log_alter(log_opts, LOG_DAEMON, slurmctld_conf.slurmctld_logfile);
-	unlock_slurmctld (config_read_lock);
+	unlock_slurmctld (config_write_lock);
 
 	conf = slurm_conf_lock();
 	old_debug_level = conf->slurmctld_debug;
-- 
GitLab