From ce4c395ddb8e48dd8c4be465047e09a8cd441de0 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 23 Jul 2004 21:50:03 +0000
Subject: [PATCH] Added infrastructure for system checkpoint: plugin, RPCs,
 APIs, and scontrol options. For now only the NULL plugin is available, but
 this is required for ASC Purple.

---
 configure.ac                                  |   2 +
 doc/man/Makefile.am                           |   2 +
 doc/man/man1/scontrol.1                       |  13 +-
 doc/man/man3/slurm_checkpoint.3               | 176 ++++++++++++
 doc/man/man3/slurm_checkpoint_error.3         |   1 +
 slurm/slurm.h.in                              |  43 +++
 slurm/slurm_errno.h                           |   2 +
 src/api/Makefile.am                           |   1 +
 src/api/checkpoint.c                          | 142 ++++++++++
 src/common/Makefile.am                        |   1 +
 src/common/checkpoint.c                       | 250 ++++++++++++++++++
 src/common/checkpoint.h                       |  44 +++
 src/common/slurm_errno.c                      |   7 +-
 src/common/slurm_protocol_defs.c              |  15 ++
 src/common/slurm_protocol_defs.h              |  15 ++
 src/common/slurm_protocol_pack.c              | 183 ++++++++++---
 src/plugins/Makefile.am                       |   2 +-
 src/plugins/checkpoint/Makefile.am            |   4 +
 src/plugins/checkpoint/none/Makefile.am       |  19 ++
 src/plugins/checkpoint/none/checkpoint_none.c | 115 ++++++++
 src/scontrol/scontrol.c                       |  98 ++++++-
 src/slurmctld/controller.c                    |  10 +-
 src/slurmctld/proc_req.c                      |  55 ++++
 src/slurmctld/slurmctld.h                     |  13 +
 src/slurmctld/step_mgr.c                      |  91 +++++++
 25 files changed, 1254 insertions(+), 50 deletions(-)
 create mode 100644 doc/man/man3/slurm_checkpoint.3
 create mode 100644 doc/man/man3/slurm_checkpoint_error.3
 create mode 100644 src/api/checkpoint.c
 create mode 100644 src/common/checkpoint.c
 create mode 100644 src/common/checkpoint.h
 create mode 100644 src/plugins/checkpoint/Makefile.am
 create mode 100644 src/plugins/checkpoint/none/Makefile.am
 create mode 100644 src/plugins/checkpoint/none/checkpoint_none.c

diff --git a/configure.ac b/configure.ac
index 32353d9154c..17876893f73 100644
--- a/configure.ac
+++ b/configure.ac
@@ -192,6 +192,8 @@ AC_CONFIG_FILES([Makefile
 		 src/plugins/auth/authd/Makefile
 		 src/plugins/auth/munge/Makefile
 		 src/plugins/auth/none/Makefile
+		 src/plugins/checkpoint/Makefile
+		 src/plugins/checkpoint/none/Makefile
 		 src/plugins/jobcomp/Makefile
 		 src/plugins/jobcomp/filetxt/Makefile
 		 src/plugins/jobcomp/none/Makefile
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index e51371d7551..ac4621a7d1a 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -15,6 +15,8 @@ man3_MANS = man3/slurm_hostlist_create.3 \
 	man3/slurm_allocate_resources.3 \
 	man3/slurm_allocate_resources_and_run.3 \
 	man3/slurm_api_version.3 \
+	man3/slurm_checkpoint.3 \
+	man3/slurm_checkpoint_error.3 \
 	man3/slurm_complete_job.3 \
 	man3/slurm_complete_job_step.3 \
 	man3/slurm_confirm_allocation.3 \
diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1
index a4c6a71bf2a..ab63f8f4ba6 100644
--- a/doc/man/man1/scontrol.1
+++ b/doc/man/man1/scontrol.1
@@ -1,4 +1,4 @@
-.TH SCONTROL "1" "April 2004" "scontrol 0.3" "Slurm components"
+.TH SCONTROL "1" "July 2004" "scontrol 0.3" "Slurm components"
 
 .SH "NAME"
 scontrol \- Used view and modify Slurm configuration and state.
@@ -58,6 +58,16 @@ are unavailable to user's group.
 \fIabort\fP
 Instruct the Slurm controller to terminate immediately and generate a core file.
 .TP
+\fIcheckpoint\fP \fICKPT_OP\fP \fIID\fP
+Perform a checkpoint activity on the job step(s) with the specified identification.
+\fICKPT_OP\fP may be \fIdisable\fP, \fIenable\fP, 
+\fIcreate\fP (create a checkpoint and let the job step continue), 
+\fIvacate\fP (create a checkpoint and terminate the job step), 
+\fIerror\fP (report the reason for the last checkpoint failure), 
+or \fIresume\fP (resume execution of the previously checkpointed job steps).
+\fIID\fP can be used to identify a specific job (e.g. "<job_id>", implying all of its steps) 
+or a specific job step (e.g. "<job_id>.<step_id>").
+.TP
 \fIcompleting\fP
 Display all jobs in a COMPLETING state along with associated nodes in either a 
 COMPLETING or DOWN state.
@@ -333,6 +343,7 @@ details.
 /etc/slurm.conf
 .SH "SEE ALSO"
 \fBscancel\fR(1), \fBsinfo\fR(1), \fBsqueue\fR(1), 
+\fBslurm_checkpoint\fR(3),
 \fBslurm_delete_partition\fR(3),
 \fBslurm_load_ctl_conf\fR(3), 
 \fBslurm_load_jobs\fR(3), \fBslurm_load_node\fR(3), 
diff --git a/doc/man/man3/slurm_checkpoint.3 b/doc/man/man3/slurm_checkpoint.3
new file mode 100644
index 00000000000..a5cef316929
--- /dev/null
+++ b/doc/man/man3/slurm_checkpoint.3
@@ -0,0 +1,176 @@
+.TH "Slurm API" "3" "July 2004" "Morris Jette" "Slurm checkpoin functions"
+
+.SH "NAME"
+slurm_spawn \- Slurm checkpoint functions
+
+.SH "SYNTAX"
+.LP 
+#include <slurm/slurm.h>
+.LP 
+.LP
+slurm_step_ctx \fBslurm_checkpoint\fR (
+.br
+	enum check_opts \fIop\fP,
+.br
+	uint32_t \fIjob_id\fP,
+.br
+	uint32_t \fIstep_id\fP
+.br
+);
+.LP
+slurm_step_ctx \fBslurm_checkpoint_error\fR (
+
+.br
+	uint32_t \fIjob_id\fP,
+.br
+	uint32_t \fIstep_id\fP,
+.br
+	uint32_t *\fIckpt_errno\fP,
+.br
+	char ** \fIckpt_strerror\fP
+.br
+);
+
+.SH "ARGUMENTS"
+.LP 
+.TP
+\fIop\fP
+Specifies operation to perform.
+See the \fBCHECKPOINT OPERATIONS\fR section for details.
+.TP
+\fIjob_id\fP
+SLURM job ID to perform the operation upon.
+.TP
+\fIstep_id\fP
+SLURM job step ID to perform the operation upon. 
+May be NO_VAL if the operation is to be performed on all steps of the specified job
+(only for the \fBslurm_checkpoint\fR function).
+.TP
+\fIckpt_errno\fP
+Set to the last checkpoint error number associated with this slurm job step.
+.TP
+\fIckpt_strerror\fP
+Set to a string descriptive of the last checkpoint error associated with this slurm job step. 
+This storage must be freed by the caller.
+
+.SH "DESCRIPTION"
+.LP
+\fBslurm_checkpoint\fR Perform some checkpoint operation upon a job step. 
+See the \fBCHECKPOINT OPERATIONS\fR section for details.
+.SH "CHECKPOINT OPERATIONS"
+.TP
+\fBCHECK_DISABLE\fR
+Make the identified job step non-checkpointable. 
+This can be issued as needed to prevent checkpointing while 
+a job step is in a critical section or for other reasons.
+.TP
+\fBCHECK_ENABLE\fR
+Make the indentified job step checkpointable.
+.TP
+\fBCHECK_CREATE\fR
+Request a checkpoint for the identified job step. 
+Continue its execution upon completion of the checkpoint.
+.TP
+\fBCHECK_VACATE\fR
+Request a checkpoint for the identified job step.
+Terminate its execution upon completion of the checkpoint.
+.TP
+\fBCHECK_RESUME\fR
+Request that a previously checkpointed job resume execution.
+It may continue execution on differrent nodes than were 
+originally used.
+Execution may be delayed if resources are not immediately 
+available.
+.TP
+\fBCHECK_COMPLETE\fR
+Note that a requested checkpoint has been completed.
+.TP
+\fBCHECK_FAILED\fR
+Note that a requested checkpoint has been failed.
+.SH "RETURN VALUE"
+.LP
+Zero is returned upon success. 
+On error, -1 is returned, and the Slurm error code is set appropriately.
+.SH "ERRORS"
+.LP
+\fBESLURM_INVALID_JOB_ID\fR the requested job or job step id does not exist. 
+.LP
+\fBESLURM_ACCESS_DENIED\fR the requesting user lacks authorization for the requested 
+action (e.g. trying to delete or modify another user's job). 
+.LP
+\fBESLURM_DISABLED\fR the requested operation has been disabled for this job step.
+This will occur when a request for checkpoint is issued when they have been disabled.
+.LP
+\fBESLURM_NOT_SUPPORTED\fR the requested operation is not supported on this system.
+
+.SH "EXAMPLE"
+.LP 
+#include <stdio.h>
+.br
+#include <stdlib.h>
+.br
+#include <slurm/slurm.h>
+.br
+#include <slurm/slurm_errno.h>
+.LP 
+int main (int argc, char *argv[])
+.br 
+{
+.br 
+	uint32_t job_id, step_id, ckpt_errno;
+.br
+	char *ckpt_strerror;
+.LP
+	if (argc < 3) {
+.br
+		printf("Usage: %s job_id step_id\\n");
+.br
+		exit(1);
+.br
+	}
+.LP
+	job_id = atoi(argv[1]);
+.br
+	step_id = atoi(argv[2]);
+.br
+	if (slurm_checkpoint_error(job_id, step_id, 
+.br
+			&ckpt_errno, &ckpt_strerror)) {
+.br
+		slurm_perror ("slurm_checkpoint_error:");
+.br 
+		exit (1);
+.br
+	}
+.LP
+	printf ("step:%u.%u ckpt_errno=%u ckpt_strerror:%s\\n", 
+.br 
+	        job_id, step_id, ckpt_errno, ckpt_strerror); 
+.br 
+	free(ckpt_strerror);
+.br 
+	exit (0);
+.br 
+}
+
+.SH "COPYING"
+Copyright (C) 2004 The Regents of the University of California.
+Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+UCRL-CODE-2002-040.
+.LP
+This file is part of SLURM, a resource management program.
+For details, see <http://www.llnl.gov/linux/slurm/>.
+.LP
+SLURM is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+.LP
+SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+.SH "SEE ALSO"
+.LP 
+\fBsrun\fR(1), \fBsqueue\fR(1), \fBfree\fR(3) 
diff --git a/doc/man/man3/slurm_checkpoint_error.3 b/doc/man/man3/slurm_checkpoint_error.3
new file mode 100644
index 00000000000..41c6cb312d4
--- /dev/null
+++ b/doc/man/man3/slurm_checkpoint_error.3
@@ -0,0 +1 @@
+.so man3/slurm_checkpoint.3
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index 3a469040b79..7d3ecc85d43 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -173,6 +173,19 @@ enum ctx_keys {
 	SLURM_STEP_CTX_TID	/* get array of task IDs for specified node */
 };
 
+/* Define checkpoint options */
+enum check_opts {
+	CHECK_DISABLE,		/* disable checkpointing */
+	CHECK_ENABLE,		/* enable checkpointing */
+	CHECK_CREATE,		/* create a checkpoint for this job, 
+				 * job continues execution afterwards */
+	CHECK_VACATE,		/* create a checkpoint for this job,
+				 * job terminates afterwards */
+	CHECK_RESUME,		/* resume a previously checkpointed job */
+	CHECK_COMPLETE,		/* a checkpoint of this job has completed */
+	CHECK_FAILED		/* a checkpoint of this job has failed */
+};
+
 /*****************************************************************************\
  *	PROTOCOL DATA STRUCTURE DEFINITIONS
 \*****************************************************************************/
@@ -1018,6 +1031,36 @@ extern int slurm_reconfigure PARAMS(( void ));
  */
 extern int slurm_shutdown PARAMS(( uint16_t core ));
 
+/*****************************************************************************\
+ *      SLURM JOB CHECKPOINT FUNCTIONS
+\*****************************************************************************/
+
+/*
+ * slurm_checkpoint - perform some checkpoint operation for some job step
+ * IN op      - operation to perform
+ * IN job_id  - job on which to perform operation
+ * IN step_id - job step on which to perform operation
+ * RET 0 or a slurm error code
+ */
+extern int slurm_checkpoint PARAMS(( enum check_opts op, uint32_t job_id,
+		uint32_t step_id ));
+
+
+/*
+ * slurm_checkpoint_error - gather error information for the last checkpoint operation 
+ * for some job step
+ * IN job_id  - job on which to perform operation
+ * IN step_id - job step on which to perform operation
+ * OUT ckpt_errno - error number associated with the last checkpoint operation,
+ *	this value is dependent upon the checkpoint plugin used and may be
+ *	completely unrelated to slurm error codes
+ * OUT ckpt_strerror - string describing the message associated with the last 
+ *	checkpoint operation
+ * RET 0 or a slurm error code
+ */
+extern int slurm_checkpoint_error PARAMS(( uint32_t job_id, uint32_t step_id, 
+		uint32_t *ckpt_errno, char **ckpt_strerror ));
+
 END_C_DECLS
 
 #endif
diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h
index 141f27d66d2..196f4744c89 100644
--- a/slurm/slurm_errno.h
+++ b/slurm/slurm_errno.h
@@ -128,6 +128,8 @@ enum {
 	ESLURM_INVALID_SCHEDTYPE_CHANGE,
 	ESLURM_INVALID_SWITCHTYPE_CHANGE,
 	ESLURM_FRAGMENTATION,
+	ESLURM_NOT_SUPPORTED,
+	ESLURM_DISABLED,
 
 	/* switch specific error codes, specific values defined in plugin module */
 	ESLURM_SWITCH_MIN = 3000,
diff --git a/src/api/Makefile.am b/src/api/Makefile.am
index 1539ffbe252..67ae78f5c29 100644
--- a/src/api/Makefile.am
+++ b/src/api/Makefile.am
@@ -47,6 +47,7 @@ libslurm_la_SOURCES =    \
 	allocate.c       \
 	bnr.c            \
 	cancel.c         \
+	checkpoint.c     \
 	complete.c       \
 	config_info.c    \
 	init_msg.c       \
diff --git a/src/api/checkpoint.c b/src/api/checkpoint.c
new file mode 100644
index 00000000000..61f33725b88
--- /dev/null
+++ b/src/api/checkpoint.c
@@ -0,0 +1,142 @@
+/*****************************************************************************\
+ *  checkpoint.c - Process checkpoint related functions.
+ *****************************************************************************
+ *  Copyright (C) 2004 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Morris Jette <jette1@llnl.gov> et. al.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <string.h>
+#include <slurm/slurm.h>
+
+#include "src/common/checkpoint.h"
+#include "src/common/slurm_protocol_api.h"
+
+static int _handle_rc_msg(slurm_msg_t *msg);
+
+/*
+ * slurm_checkpoint - perform some checkpoint operation for some job step
+ * IN op      - operation to perform
+ * IN job_id  - job on which to perform operation
+ * IN step_id - job step on which to perform operation
+ * RET 0 or a slurm error code
+ */
+extern int slurm_checkpoint ( enum check_opts op, uint32_t job_id,
+                uint32_t step_id)
+{
+	int rc;
+	slurm_msg_t msg;
+	checkpoint_msg_t req;
+
+	/*
+	 * Request message:
+	 */
+	req.op       = op;
+	req.job_id   = job_id;
+	req.step_id  = step_id;
+	msg.msg_type = REQUEST_CHECKPOINT;
+	msg.data     = &req;
+
+	if (slurm_send_recv_controller_rc_msg(&msg, &rc) < 0)
+		return SLURM_ERROR;
+
+	if (rc)
+		slurm_seterrno_ret(rc);
+
+	return SLURM_SUCCESS;
+}
+
+/*
+ * slurm_checkpoint_error - gather error information for the last checkpoint operation 
+ * for some job step
+ * IN job_id  - job on which to perform operation
+ * IN step_id - job step on which to perform operation
+ * OUT ckpt_errno - error number associated with the last checkpoint operation,
+ *	this value is dependent upon the checkpoint plugin used and may be
+ *	completely unrelated to slurm error codes
+ * OUT ckpt_strerror - string describing the message associated with the last 
+ *	checkpoint operation
+ * RET 0 or a slurm error code
+ */
+extern int slurm_checkpoint_error ( uint32_t job_id, uint32_t step_id, 
+		uint32_t *ckpt_errno, char **ckpt_strerror)
+{
+	int rc;
+	slurm_msg_t msg;
+	checkpoint_msg_t req;
+	slurm_msg_t resp_msg;
+	checkpoint_resp_msg_t *ckpt_resp;
+
+	if ((ckpt_errno == NULL) || (ckpt_strerror == NULL))
+		return EINVAL;
+
+	/*
+	 * Request message:
+	 */
+	req.op       = CHECK_ERROR;
+	req.job_id   = job_id;
+	req.step_id  = step_id;
+	msg.msg_type = REQUEST_CHECKPOINT;
+	msg.data     = &req;
+
+	rc = slurm_send_recv_controller_msg(&msg, &resp_msg);
+
+	if (rc == SLURM_SOCKET_ERROR) 
+		return SLURM_SOCKET_ERROR;
+
+	switch (resp_msg.msg_type) {
+	case RESPONSE_SLURM_RC:
+		if (_handle_rc_msg(&resp_msg) < 0)
+			return SLURM_PROTOCOL_ERROR;
+		break;
+	case RESPONSE_CHECKPOINT:
+		ckpt_resp = (checkpoint_resp_msg_t *) resp_msg.data;
+		*ckpt_errno = ckpt_resp->ckpt_errno;
+		*ckpt_strerror = strdup(ckpt_resp->ckpt_strerror);
+		slurm_free_checkpoint_resp_msg(ckpt_resp);
+		break;
+	default:
+		slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
+	}
+
+	return SLURM_SUCCESS;
+}
+
+/*
+ *  Handle a return code message type. 
+ *    if return code is nonzero, sets errno to return code and returns < 0.
+ *    Otherwise, returns 0 (SLURM_SUCCES)
+ */
+static int
+_handle_rc_msg(slurm_msg_t *msg)
+{
+	int rc = ((return_code_msg_t *) msg->data)->return_code;
+	slurm_free_return_code_msg(msg->data);
+
+	if (rc) 
+		slurm_seterrno_ret(rc);
+	else
+		return SLURM_SUCCESS;
+}
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index dcd08647adc..30ef5b07d85 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -62,6 +62,7 @@ libcommon_la_SOURCES = 			\
 	getopt.h getopt.c getopt1.c     \
 	$(build_unsetenv_src)		\
 	hostlist.c hostlist.h		\
+	checkpoint.c checkpoint.h	\
 	slurm_xlator.h
 
 EXTRA_libcommon_la_SOURCES = 	\
diff --git a/src/common/checkpoint.c b/src/common/checkpoint.c
new file mode 100644
index 00000000000..e771e88e90e
--- /dev/null
+++ b/src/common/checkpoint.c
@@ -0,0 +1,250 @@
+/*****************************************************************************\
+ *  checkpoint.c - implementation-independent checkpoint functions
+ *****************************************************************************
+ *  Copyright (C) 2004 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Morris Jette <jette1@llnl.com>
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "src/common/macros.h"
+#include "src/common/plugin.h"
+#include "src/common/plugrack.h"
+#include "src/common/checkpoint.h"
+#include "src/common/slurm_protocol_api.h"
+#include "src/common/xmalloc.h"
+#include "src/common/xassert.h"
+#include "src/common/xstring.h"
+#include "src/slurmctld/slurmctld.h"
+
+/*
+ * WARNING:  Do not change the order of these fields or add additional
+ * fields at the beginning of the structure.  If you do, job completion
+ * logging plugins will stop working.  If you need to add fields, add them 
+ * at the end of the structure.
+ */
+typedef struct slurm_checkpoint_ops {
+	int     (*ckpt_op) (enum check_opts op, struct step_record * step_ptr);
+	int	(*ckpt_error) (struct step_record * step_ptr, uint32_t *ckpt_errno, 
+			char **ckpt_strerror);
+} slurm_checkpoint_ops_t;
+
+/*
+ * A global job completion context.  "Global" in the sense that there's
+ * only one, with static bindings.  We don't export it.
+ */
+
+struct slurm_checkpoint_context {
+	char *			checkpoint_type;
+	plugrack_t		plugin_list;
+	plugin_handle_t		cur_plugin;
+	int			checkpoint_errno;
+	slurm_checkpoint_ops_t	ops;
+};
+
+static slurm_checkpoint_context_t g_context = NULL;
+static pthread_mutex_t      context_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static slurm_checkpoint_context_t
+_slurm_checkpoint_context_create( const char *checkpoint_type)
+{
+	slurm_checkpoint_context_t c;
+
+	if ( checkpoint_type == NULL ) {
+		debug3( "_slurm_checkpoint_context_create: no checkpoint type");
+		return NULL;
+	}
+
+	c = xmalloc( sizeof( struct slurm_checkpoint_context ) );
+
+	c->checkpoint_errno = SLURM_SUCCESS;
+
+	/* Copy the job completion job completion type. */
+	c->checkpoint_type = xstrdup( checkpoint_type );
+	if ( c->checkpoint_type == NULL ) {
+		debug3( "can't make local copy of checkpoint type" );
+		xfree( c );
+		return NULL; 
+	}
+
+	/* Plugin rack is demand-loaded on first reference. */
+	c->plugin_list = NULL; 
+	c->cur_plugin = PLUGIN_INVALID_HANDLE; 
+
+	return c;
+}
+
+static int
+_slurm_checkpoint_context_destroy( slurm_checkpoint_context_t c )
+{
+	/*
+	 * Must check return code here because plugins might still
+	 * be loaded and active.
+	 */
+	if ( c->plugin_list ) {
+		if ( plugrack_destroy( c->plugin_list ) != SLURM_SUCCESS ) {
+			 return SLURM_ERROR;
+		}
+	}
+
+	xfree( c->checkpoint_type );
+	xfree( c );
+
+	return SLURM_SUCCESS;
+}
+
+/*
+ * Resolve the operations from the plugin.
+ */
+static slurm_checkpoint_ops_t *
+_slurm_checkpoint_get_ops( slurm_checkpoint_context_t c )
+{
+        /*
+         * These strings must be kept in the same order as the fields
+         * declared for slurm_checkpoint_ops_t.
+         */
+	static const char *syms[] = {
+		"slurm_ckpt_op",
+		"slurm_ckpt_error"
+	};
+        int n_syms = sizeof( syms ) / sizeof( char * );
+
+        /* Get the plugin list, if needed. */
+        if ( c->plugin_list == NULL ) {
+		char *plugin_dir;
+                c->plugin_list = plugrack_create();
+                if ( c->plugin_list == NULL ) {
+                        error( "Unable to create a plugin manager" );
+                        return NULL;
+                }
+
+                plugrack_set_major_type( c->plugin_list, "checkpoint" );
+                plugrack_set_paranoia( c->plugin_list, 
+				       PLUGRACK_PARANOIA_NONE, 
+				       0 );
+		plugin_dir = slurm_get_plugin_dir();
+                plugrack_read_dir( c->plugin_list, plugin_dir );
+		xfree(plugin_dir);
+        }
+  
+        /* Find the correct plugin. */
+        c->cur_plugin = 
+		plugrack_use_by_type( c->plugin_list, c->checkpoint_type );
+        if ( c->cur_plugin == PLUGIN_INVALID_HANDLE ) {
+                error( "can't find a plugin for type %s", c->checkpoint_type );
+                return NULL;
+        }  
+
+        /* Dereference the API. */
+        if ( plugin_get_syms( c->cur_plugin,
+                              n_syms,
+                              syms,
+                              (void **) &c->ops ) < n_syms ) {
+                error( "incomplete checkpoint plugin detected" );
+                return NULL;
+        }
+
+        return &c->ops;
+}
+
+extern int
+g_slurm_checkpoint_init(void)
+{
+	int retval = SLURM_SUCCESS;
+	char *checkpoint_type;
+
+	slurm_mutex_lock( &context_lock );
+
+	if ( g_context )
+		_slurm_checkpoint_context_destroy(g_context);
+#if 0
+	checkpoint_type = slurm_get_checkpoint_type();
+#else
+	checkpoint_type = xstrdup("checkpoint/none");
+#endif
+	g_context = _slurm_checkpoint_context_create( checkpoint_type );
+	if ( g_context == NULL ) {
+		error( "cannot create a context for %s", checkpoint_type );
+		xfree(checkpoint_type);
+		retval = SLURM_ERROR;
+		goto done;
+	}
+
+	if ( _slurm_checkpoint_get_ops( g_context ) == NULL ) {
+		error( "cannot resolve checkpoint plugin operations" );
+		_slurm_checkpoint_context_destroy( g_context );
+		g_context = NULL;
+		retval = SLURM_ERROR;
+	}
+	verbose("Checkpoint plugin loaded: %s", checkpoint_type);
+	xfree(checkpoint_type);
+
+  done:
+	slurm_mutex_unlock( &context_lock );
+	return retval;
+}
+
+extern void
+g_slurm_checkpoint_fini(void)
+{
+	slurm_mutex_lock( &context_lock );
+	if ( g_context )
+		_slurm_checkpoint_context_destroy(g_context);
+	slurm_mutex_unlock( &context_lock );
+}
+
+extern int
+g_slurm_checkpoint_op(enum check_opts op, struct step_record * step_ptr)
+{
+	int retval = SLURM_SUCCESS;
+
+	slurm_mutex_lock( &context_lock );
+	if ( g_context )
+		retval = (*(g_context->ops.ckpt_op))(op, step_ptr);
+	else {
+		error ("slurm_checkpoint plugin context not initialized");
+		retval = ENOENT;
+	}
+	slurm_mutex_unlock( &context_lock );
+	return retval;
+}
+
+extern int
+g_slurm_checkpoint_error(struct step_record * step_ptr, 
+		uint32_t *ckpt_errno, char **ckpt_strerror)
+{
+	int retval = SLURM_SUCCESS;
+
+	slurm_mutex_lock( &context_lock );
+	if ( g_context )
+		retval = (*(g_context->ops.ckpt_error))(step_ptr, 
+				ckpt_errno, ckpt_strerror);
+	else {
+		error ("slurm_checkpoint plugin context not initialized");
+		retval = ENOENT;
+	}
+	slurm_mutex_unlock( &context_lock );
+	return retval;
+}
+
diff --git a/src/common/checkpoint.h b/src/common/checkpoint.h
new file mode 100644
index 00000000000..35c4f117eb4
--- /dev/null
+++ b/src/common/checkpoint.h
@@ -0,0 +1,44 @@
+/*****************************************************************************\
+ *  checkpoint.h - implementation-independent checkpoint API definitions. 
+ *****************************************************************************
+ *  Copyright (C) 2004 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Morris Jette <jette1@llnl.com>
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+#ifndef __CHECKPOINT_H__
+#define __CHECKPOINT_H__
+
+#include "slurm/slurm.h"
+#include "src/slurmctld/slurmctld.h"
+
+#define CHECK_ERROR 100		/* Used like enum checkopts, but not exported to user */
+
+typedef struct slurm_checkpoint_context * slurm_checkpoint_context_t;
+
+extern int g_slurm_checkpoint_init(void);
+extern void g_slurm_checkpoint_fini(void);
+extern int g_slurm_checkpoint_op(enum check_opts op, struct step_record * step_ptr);
+extern int g_slurm_checkpoint_error(struct step_record * step_ptr, 
+		uint32_t *ckpt_errno, char **ckpt_strerror);
+
+#endif /*__CHECKPOINT_H__*/
+
diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c
index 6947c6354b7..ce8419f0508 100644
--- a/src/common/slurm_errno.c
+++ b/src/common/slurm_errno.c
@@ -159,7 +159,12 @@ static slurm_errtab_t slurm_errtab[] = {
 	{ ESLURM_INVALID_SWITCHTYPE_CHANGE,
 	  "SwitchType change requires restart of all SLURM daemons and jobs"},
 	{ ESLURM_FRAGMENTATION,
-	  "Resources too fragmented for allocation"		},
+	  "Immediate execution impossible, "
+	  "resources too fragmented for allocation"		},
+	{ ESLURM_NOT_SUPPORTED,
+	  "Requested operation not supported on this system"	},
+	{ ESLURM_DISABLED,
+	  "Requested operation is presently disabled"		},
 
 	/* slurmd error codes */
 
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index c4920b7a9b9..5b119c5aa4e 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -392,6 +392,21 @@ void inline slurm_free_srun_timeout_msg(srun_timeout_msg_t * msg)
 	}
 }
 
+void inline slurm_free_checkpoint_msg(checkpoint_msg_t *msg)
+{
+	if (msg) {
+		xfree(msg);
+	}
+}
+
+void inline slurm_free_checkpoint_resp_msg(checkpoint_resp_msg_t *msg)
+{
+	if (msg) {
+		xfree(msg->ckpt_strerror);
+		xfree(msg);
+	}
+}
+
 char *job_state_string(enum job_states inx)
 {
 	static char *job_state_string[] = {
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index fbbe5d1961b..7921bef728d 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -124,6 +124,8 @@ typedef enum {
 	RESPONSE_CANCEL_JOB_STEP,
 	REQUEST_COMPLETE_JOB_STEP,
 	RESPONSE_COMPLETE_JOB_STEP,
+	REQUEST_CHECKPOINT,
+	RESPONSE_CHECKPOINT,
 
 	REQUEST_LAUNCH_TASKS = 6001,
 	RESPONSE_LAUNCH_TASKS,
@@ -391,6 +393,17 @@ typedef struct srun_timeout_msg {
 	time_t   timeout;	/* when job scheduled to be killed */
 } srun_timeout_msg_t;
 
+typedef struct checkpoint_msg {
+	uint16_t op;		/* checkpoint operation, see enum check_opts */
+	uint32_t job_id;	/* slurm job_id */
+	uint32_t step_id;	/* slurm step_id */
+} checkpoint_msg_t;
+
+typedef struct checkpoint_resp_msg {
+	uint32_t ckpt_errno;	/* errno from last checkpoint operation */
+	char * ckpt_strerror;	/* string descriptive of ckpt_errno */
+} checkpoint_resp_msg_t;
+
 /*****************************************************************************\
  * Slurm API Message Types
 \*****************************************************************************/
@@ -469,6 +482,8 @@ void inline slurm_free_epilog_complete_msg(epilog_complete_msg_t * msg);
 void inline slurm_free_srun_ping_msg(srun_ping_msg_t * msg);
 void inline slurm_free_srun_node_fail_msg(srun_node_fail_msg_t * msg);
 void inline slurm_free_srun_timeout_msg(srun_timeout_msg_t * msg);
+void inline slurm_free_checkpoint_msg(checkpoint_msg_t *msg);
+void inline slurm_free_checkpoint_resp_msg(checkpoint_resp_msg_t *msg);
 
 void slurm_free_resource_allocation_response_msg (
 		resource_allocation_response_msg_t * msg);
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 16a20ac176b..0cf6117cdfa 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -43,6 +43,7 @@
 #include "src/common/slurm_protocol_pack.h"
 #include "src/common/switch.h"
 #include "src/common/xmalloc.h"
+#include "src/common/xassert.h"
 
 #define _pack_job_info_msg(msg,buf)		_pack_buffer_msg(msg,buf)
 #define _pack_job_step_info_msg(msg,buf)	_pack_buffer_msg(msg,buf)
@@ -229,6 +230,12 @@ static int  _unpack_srun_node_fail_msg(srun_node_fail_msg_t ** msg_ptr,
 static void _pack_srun_timeout_msg(srun_timeout_msg_t * msg, Buf buffer);
 static int  _unpack_srun_timeout_msg(srun_timeout_msg_t ** msg_ptr, Buf buffer);
 
+static void _pack_checkpoint_msg(checkpoint_msg_t *msg, Buf buffer);
+static int  _unpack_checkpoint_msg(checkpoint_msg_t **msg_ptr, Buf buffer);
+
+static void _pack_checkpoint_resp_msg(checkpoint_resp_msg_t *msg, Buf buffer);
+static int  _unpack_checkpoint_resp_msg(checkpoint_resp_msg_t **msg_ptr, Buf buffer);
+
 static void _pack_buffer_msg(slurm_msg_t * msg, Buf buffer);
 
 /* pack_header
@@ -483,6 +490,12 @@ pack_msg(slurm_msg_t const *msg, Buf buffer)
 	 case SRUN_TIMEOUT:
 		_pack_srun_timeout_msg((srun_timeout_msg_t *)msg->data, buffer);
 		break;
+	 case REQUEST_CHECKPOINT:
+		_pack_checkpoint_msg((checkpoint_msg_t *)msg->data, buffer);
+		break;
+	 case RESPONSE_CHECKPOINT:
+		_pack_checkpoint_resp_msg((checkpoint_resp_msg_t *)msg->data, buffer);
+		break;
 	 default:
 		 debug("No pack method for msg type %i", msg->msg_type);
 		 return EINVAL;
@@ -727,6 +740,14 @@ unpack_msg(slurm_msg_t * msg, Buf buffer)
 		rc = _unpack_srun_timeout_msg((srun_timeout_msg_t **)
 				& msg->data, buffer);
 		break;
+	 case REQUEST_CHECKPOINT:
+		rc = _unpack_checkpoint_msg((checkpoint_msg_t **)
+				& msg->data, buffer);
+		break;
+	 case RESPONSE_CHECKPOINT:
+		rc = _unpack_checkpoint_resp_msg((checkpoint_resp_msg_t **)
+				& msg->data, buffer);
+		break;
 	 default:
 		 debug("No unpack method for msg type %i", msg->msg_type);
 		 return EINVAL;
@@ -741,7 +762,7 @@ unpack_msg(slurm_msg_t * msg, Buf buffer)
 static void
 _pack_update_node_msg(update_node_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	packstr(msg->node_names, buffer);
 	pack16(msg->node_state, buffer);
@@ -755,7 +776,7 @@ _unpack_update_node_msg(update_node_msg_t ** msg, Buf buffer)
 	update_node_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	tmp_ptr = xmalloc(sizeof(update_node_msg_t));
 	*msg = tmp_ptr;
 
@@ -777,7 +798,7 @@ _pack_node_registration_status_msg(slurm_node_registration_status_msg_t *
 				   msg, Buf buffer)
 {
 	int i;
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack_time(msg->timestamp, buffer);
 	pack32(msg->status, buffer);
@@ -803,7 +824,7 @@ _unpack_node_registration_status_msg(slurm_node_registration_status_msg_t
 	slurm_node_registration_status_msg_t *node_reg_ptr;
 
 	/* alloc memory for structure */
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	node_reg_ptr = xmalloc(sizeof(slurm_node_registration_status_msg_t));
 	*msg = node_reg_ptr;
 
@@ -841,7 +862,7 @@ static void
 _pack_resource_allocation_response_msg(resource_allocation_response_msg_t *
 				       msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->error_code, buffer);
 	pack32(msg->job_id, buffer);
@@ -864,7 +885,7 @@ _unpack_resource_allocation_response_msg(resource_allocation_response_msg_t
 	resource_allocation_response_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	tmp_ptr = xmalloc(sizeof(resource_allocation_response_msg_t));
 	*msg = tmp_ptr;
 
@@ -912,7 +933,7 @@ _unpack_resource_allocation_response_msg(resource_allocation_response_msg_t
 static void
  _pack_resource_allocation_and_run_response_msg
     (resource_allocation_and_run_response_msg_t * msg, Buf buffer) {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_id, buffer);
 	packstr(msg->node_list, buffer);
@@ -936,7 +957,7 @@ static int
 	resource_allocation_and_run_response_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	tmp_ptr = xmalloc(sizeof(resource_allocation_and_run_response_msg_t));
 	*msg = tmp_ptr;
 
@@ -994,7 +1015,7 @@ static int
 static void
 _pack_submit_response_msg(submit_response_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_id, buffer);
 	pack32(msg->error_code, buffer);
@@ -1006,7 +1027,7 @@ _unpack_submit_response_msg(submit_response_msg_t ** msg, Buf buffer)
 	submit_response_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	tmp_ptr = xmalloc(sizeof(submit_response_msg_t));
 	*msg = tmp_ptr;
 
@@ -1032,7 +1053,7 @@ _unpack_node_info_msg(node_info_msg_t ** msg, Buf buffer)
 	int i;
 	node_info_t *node = NULL;
 
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	*msg = xmalloc(sizeof(node_info_msg_t));
 
 	/* load buffer's header (data structure version and time) */
@@ -1062,7 +1083,7 @@ _unpack_node_info_members(node_info_t * node, Buf buffer)
 {
 	uint16_t uint16_tmp;
 
-	assert(node != NULL);
+	xassert(node != NULL);
 
 	safe_unpackstr_xmalloc(&node->name, &uint16_tmp, buffer);
 	safe_unpack16(&node->node_state, buffer);
@@ -1088,7 +1109,7 @@ _unpack_node_info_members(node_info_t * node, Buf buffer)
 static void
 _pack_update_partition_msg(update_part_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	packstr(msg->allow_groups, buffer);
 	pack16(msg-> default_part, buffer);
@@ -1110,7 +1131,7 @@ _unpack_update_partition_msg(update_part_msg_t ** msg, Buf buffer)
 	uint16_t uint16_tmp;
 	update_part_msg_t *tmp_ptr;
 
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	/* alloc memory for structure */
 	tmp_ptr = xmalloc(sizeof(update_part_msg_t));
@@ -1142,7 +1163,7 @@ _unpack_update_partition_msg(update_part_msg_t ** msg, Buf buffer)
 static void
 _pack_delete_partition_msg(delete_part_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	packstr(msg->name,         buffer);
 }
@@ -1153,7 +1174,7 @@ _unpack_delete_partition_msg(delete_part_msg_t ** msg, Buf buffer)
 	uint16_t uint16_tmp;
 	delete_part_msg_t *tmp_ptr;
 
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	/* alloc memory for structure */
 	tmp_ptr = xmalloc(sizeof(delete_part_msg_t));
@@ -1172,7 +1193,7 @@ static void
 _pack_job_step_create_request_msg(job_step_create_request_msg_t
 				  * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_id, buffer);
 	pack32(msg->user_id, buffer);
@@ -1195,7 +1216,7 @@ _unpack_job_step_create_request_msg(job_step_create_request_msg_t ** msg,
 	job_step_create_request_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	tmp_ptr = xmalloc(sizeof(job_step_create_request_msg_t));
 	*msg = tmp_ptr;
 
@@ -1224,7 +1245,7 @@ _unpack_job_step_create_request_msg(job_step_create_request_msg_t ** msg,
 static void
 _pack_kill_job_msg(kill_job_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_id,  buffer);
 	pack32(msg->job_uid, buffer);
@@ -1236,7 +1257,7 @@ _unpack_kill_job_msg(kill_job_msg_t ** msg, Buf buffer)
 	kill_job_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg);
+	xassert(msg);
 	tmp_ptr = xmalloc(sizeof(kill_job_msg_t));
 	*msg = tmp_ptr;
 
@@ -1254,7 +1275,7 @@ _unpack_kill_job_msg(kill_job_msg_t ** msg, Buf buffer)
 static void 
 _pack_epilog_comp_msg(epilog_complete_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_id, buffer);
 	pack32(msg->return_code, buffer);
@@ -1268,7 +1289,7 @@ _unpack_epilog_comp_msg(epilog_complete_msg_t ** msg, Buf buffer)
 	uint16_t uint16_tmp;
 
 	/* alloc memory for structure */
-	assert(msg);
+	xassert(msg);
 	tmp_ptr = xmalloc(sizeof(epilog_complete_msg_t));
 	*msg = tmp_ptr;
 
@@ -1286,7 +1307,7 @@ _unpack_epilog_comp_msg(epilog_complete_msg_t ** msg, Buf buffer)
 static void
 _pack_update_job_time_msg(job_time_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_id, buffer);
 	pack_time((uint32_t) msg->expiration_time, buffer);
@@ -1298,7 +1319,7 @@ _unpack_update_job_time_msg(job_time_msg_t ** msg, Buf buffer)
 	job_time_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg);
+	xassert(msg);
 	tmp_ptr = xmalloc(sizeof(job_time_msg_t));
 	*msg = tmp_ptr;
 
@@ -1316,7 +1337,7 @@ static void
 _pack_job_step_create_response_msg(job_step_create_response_msg_t * msg,
 				   Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_step_id, buffer);
 	packstr(msg->node_list, buffer);
@@ -1333,7 +1354,7 @@ _unpack_job_step_create_response_msg(job_step_create_response_msg_t ** msg,
 	job_step_create_response_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	tmp_ptr = xmalloc(sizeof(job_step_create_response_msg_t));
 	*msg = tmp_ptr;
 
@@ -1361,6 +1382,8 @@ _unpack_job_step_create_response_msg(job_step_create_response_msg_t ** msg,
 static void
 _pack_partition_info_msg(slurm_msg_t * msg, Buf buffer)
 {
+	xassert(msg != NULL);
+
 	packmem_array(msg->data, msg->data_size, buffer);
 }
 
@@ -1370,6 +1393,7 @@ _unpack_partition_info_msg(partition_info_msg_t ** msg, Buf buffer)
 	int i;
 	partition_info_t *partition = NULL;
 
+	xassert(msg != NULL);
 	*msg = xmalloc(sizeof(partition_info_msg_t));
 
 	/* load buffer's header (data structure version and time) */
@@ -1510,6 +1534,7 @@ _unpack_job_step_info_response_msg(job_step_info_response_msg_t
 	int i = 0;
 	job_step_info_t *step;
 
+	xassert(msg != NULL);
 	*msg = xmalloc(sizeof(job_step_info_response_msg_t));
 
 	safe_unpack_time(&(*msg)->last_update, buffer);
@@ -1534,6 +1559,7 @@ _unpack_job_step_info_response_msg(job_step_info_response_msg_t
 static void
 _pack_buffer_msg(slurm_msg_t * msg, Buf buffer)
 {
+	xassert(msg != NULL);
 	packmem_array(msg->data, msg->data_size, buffer);
 }
 
@@ -1543,6 +1569,7 @@ _unpack_job_info_msg(job_info_msg_t ** msg, Buf buffer)
 	int i;
 	job_info_t *job = NULL;
 
+	xassert(msg != NULL);
 	*msg = xmalloc(sizeof(job_info_msg_t));
 
 	/* load buffer's header (data structure version and time) */
@@ -1949,6 +1976,7 @@ _unpack_old_job_desc_msg(old_job_alloc_msg_t **
 static void
 _pack_last_update_msg(last_update_msg_t * msg, Buf buffer)
 {
+	xassert(msg != NULL);
 	pack_time(msg->last_update, buffer);
 }
 
@@ -1957,6 +1985,7 @@ _unpack_last_update_msg(last_update_msg_t ** msg, Buf buffer)
 {
 	last_update_msg_t *last_update_msg;
 
+	xassert(msg != NULL);
 	last_update_msg = xmalloc(sizeof(last_update_msg_t));
 	*msg = last_update_msg;
 
@@ -1972,6 +2001,7 @@ _unpack_last_update_msg(last_update_msg_t ** msg, Buf buffer)
 static void
 _pack_return_code_msg(return_code_msg_t * msg, Buf buffer)
 {
+	xassert(msg != NULL);
 	pack32(msg->return_code, buffer);
 }
 
@@ -1980,6 +2010,7 @@ _unpack_return_code_msg(return_code_msg_t ** msg, Buf buffer)
 {
 	return_code_msg_t *return_code_msg;
 
+	xassert(msg != NULL);
 	return_code_msg = xmalloc(sizeof(return_code_msg_t));
 	*msg = return_code_msg;
 
@@ -1996,6 +2027,7 @@ static void
 _pack_reattach_tasks_request_msg(reattach_tasks_request_msg_t * msg,
 				 Buf buffer)
 {
+	xassert(msg != NULL);
 	pack32(msg->job_id, buffer);
 	pack32(msg->job_step_id, buffer);
 	pack32(msg->srun_node_id, buffer);
@@ -2014,6 +2046,7 @@ _unpack_reattach_tasks_request_msg(reattach_tasks_request_msg_t ** msg_ptr,
 	uint16_t uint16_tmp;
 	reattach_tasks_request_msg_t *msg;
 
+	xassert(msg_ptr != NULL);
 	msg = xmalloc(sizeof(*msg));
 	*msg_ptr = msg;
 
@@ -2041,6 +2074,7 @@ static void
 _pack_reattach_tasks_response_msg(reattach_tasks_response_msg_t * msg,
 				  Buf buffer)
 {
+	xassert(msg != NULL);
 	packstr(msg->node_name,   buffer);
 	packstr(msg->executable_name, buffer);
 	pack32(msg->return_code,  buffer);
@@ -2057,6 +2091,8 @@ _unpack_reattach_tasks_response_msg(reattach_tasks_response_msg_t ** msg_ptr,
 	uint32_t ntasks;
 	uint16_t uint16_tmp;
 	reattach_tasks_response_msg_t *msg = xmalloc(sizeof(*msg));
+
+	xassert(msg_ptr != NULL);
 	*msg_ptr = msg;
 
 	safe_unpackstr_xmalloc(&msg->node_name, &uint16_tmp, buffer);
@@ -2080,6 +2116,7 @@ _unpack_reattach_tasks_response_msg(reattach_tasks_response_msg_t ** msg_ptr,
 static void
 _pack_task_exit_msg(task_exit_msg_t * msg, Buf buffer)
 {
+	xassert(msg != NULL);
 	pack32(msg->return_code, buffer);
 	pack32(msg->num_tasks, buffer);
 	pack32_array(msg->task_id_list,
@@ -2092,6 +2129,7 @@ _unpack_task_exit_msg(task_exit_msg_t ** msg_ptr, Buf buffer)
 	task_exit_msg_t *msg;
 	uint32_t uint32_tmp;
 
+	xassert(msg_ptr != NULL);
 	msg = xmalloc(sizeof(task_exit_msg_t));
 	*msg_ptr = msg;
 
@@ -2112,6 +2150,7 @@ _unpack_task_exit_msg(task_exit_msg_t ** msg_ptr, Buf buffer)
 static void
 _pack_launch_tasks_response_msg(launch_tasks_response_msg_t * msg, Buf buffer)
 {
+	xassert(msg != NULL);
 	pack32(msg->return_code, buffer);
 	packstr(msg->node_name, buffer);
 	pack32(msg->srun_node_id, buffer);
@@ -2128,6 +2167,7 @@ _unpack_launch_tasks_response_msg(launch_tasks_response_msg_t **
 	uint32_t uint32_tmp;
 	launch_tasks_response_msg_t *msg;
 
+	xassert(msg_ptr != NULL);
 	msg = xmalloc(sizeof(launch_tasks_response_msg_t));
 	*msg_ptr = msg;
 
@@ -2150,6 +2190,7 @@ _unpack_launch_tasks_response_msg(launch_tasks_response_msg_t **
 static void
 _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer)
 {
+	xassert(msg != NULL);
 	pack32(msg->job_id, buffer);
 	pack32(msg->job_step_id, buffer);
 	pack32(msg->nnodes, buffer);
@@ -2182,6 +2223,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t **
 	uint32_t uint32_tmp;
 	launch_tasks_request_msg_t *msg;
 
+	xassert(msg_ptr != NULL);
 	msg = xmalloc(sizeof(launch_tasks_request_msg_t));
 	*msg_ptr = msg;
 
@@ -2227,6 +2269,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t **
 static void
 _pack_spawn_task_request_msg(spawn_task_request_msg_t * msg, Buf buffer)
 {
+	xassert(msg != NULL);
 	pack32(msg->job_id, buffer);
 	pack32(msg->job_step_id, buffer);
 	pack32(msg->nnodes, buffer);
@@ -2252,6 +2295,7 @@ _unpack_spawn_task_request_msg(spawn_task_request_msg_t **
 	uint16_t uint16_tmp;
 	spawn_task_request_msg_t *msg;
 
+	xassert(msg_ptr != NULL);
 	msg = xmalloc(sizeof(launch_tasks_request_msg_t));
 	*msg_ptr = msg;
 
@@ -2559,7 +2603,7 @@ _unpack_slurm_addr_array(slurm_addr ** slurm_address,
 static void
 _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_id, buffer);
 	pack32(msg->uid, buffer);
@@ -2590,7 +2634,7 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer)
 	uint32_t uint32_tmp;
 	batch_job_launch_msg_t *launch_msg_ptr;
 
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	launch_msg_ptr = xmalloc(sizeof(batch_job_launch_msg_t));
 	*msg = launch_msg_ptr;
 
@@ -2650,7 +2694,7 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer)
 static void
 _pack_job_id_request_msg(job_id_request_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_pid, buffer);
 }
@@ -2661,7 +2705,7 @@ _unpack_job_id_request_msg(job_id_request_msg_t ** msg, Buf buffer)
 	job_id_request_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	tmp_ptr = xmalloc(sizeof(job_id_request_msg_t));
 	*msg = tmp_ptr;
 
@@ -2678,7 +2722,7 @@ _unpack_job_id_request_msg(job_id_request_msg_t ** msg, Buf buffer)
 static void
 _pack_job_id_response_msg(job_id_response_msg_t * msg, Buf buffer)
 {
-	assert(msg != NULL);
+	xassert(msg != NULL);
 
 	pack32(msg->job_id, buffer);
 }
@@ -2689,7 +2733,7 @@ _unpack_job_id_response_msg(job_id_response_msg_t ** msg, Buf buffer)
 	job_id_response_msg_t *tmp_ptr;
 
 	/* alloc memory for structure */
-	assert(msg != NULL);
+	xassert(msg != NULL);
 	tmp_ptr = xmalloc(sizeof(job_id_response_msg_t));
 	*msg = tmp_ptr;
 
@@ -2706,7 +2750,7 @@ _unpack_job_id_response_msg(job_id_response_msg_t ** msg, Buf buffer)
 static void
 _pack_srun_ping_msg(srun_ping_msg_t * msg, Buf buffer)
 {
-	assert ( msg != NULL );
+	xassert ( msg != NULL );
 
 	pack32 ( msg ->job_id  , buffer ) ;
 	pack32 ( msg ->step_id , buffer ) ;
@@ -2716,7 +2760,7 @@ static int
 _unpack_srun_ping_msg(srun_ping_msg_t ** msg_ptr, Buf buffer)
 {
 	srun_ping_msg_t * msg;
-	assert ( msg_ptr != NULL );
+	xassert ( msg_ptr != NULL );
 
 	msg = xmalloc ( sizeof (srun_ping_msg_t) ) ;
 	*msg_ptr = msg;
@@ -2734,7 +2778,7 @@ _unpack_srun_ping_msg(srun_ping_msg_t ** msg_ptr, Buf buffer)
 static void 
 _pack_srun_node_fail_msg(srun_node_fail_msg_t * msg, Buf buffer)
 {
-	assert ( msg != NULL );
+	xassert ( msg != NULL );
 
 	pack32 ( msg ->job_id  , buffer ) ;
 	pack32 ( msg ->step_id , buffer ) ;
@@ -2746,7 +2790,7 @@ _unpack_srun_node_fail_msg(srun_node_fail_msg_t ** msg_ptr, Buf buffer)
 {
 	uint16_t uint16_tmp;
 	srun_node_fail_msg_t * msg;
-	assert ( msg_ptr != NULL );
+	xassert ( msg_ptr != NULL );
 
 	msg = xmalloc ( sizeof (srun_node_fail_msg_t) ) ;
 	*msg_ptr = msg;
@@ -2767,7 +2811,7 @@ _unpack_srun_node_fail_msg(srun_node_fail_msg_t ** msg_ptr, Buf buffer)
 static void
 _pack_srun_timeout_msg(srun_timeout_msg_t * msg, Buf buffer)
 {
-	assert ( msg != NULL );
+	xassert ( msg != NULL );
 
 	pack32 ( msg -> job_id  , buffer ) ;
 	pack32 ( msg -> step_id , buffer ) ;
@@ -2778,7 +2822,7 @@ static int
 _unpack_srun_timeout_msg(srun_timeout_msg_t ** msg_ptr, Buf buffer)
 {
 	srun_timeout_msg_t * msg;
-	assert ( msg_ptr != NULL );
+	xassert ( msg_ptr != NULL );
 
 	msg = xmalloc ( sizeof (srun_timeout_msg_t) ) ;
 	*msg_ptr = msg ;
@@ -2794,10 +2838,69 @@ _unpack_srun_timeout_msg(srun_timeout_msg_t ** msg_ptr, Buf buffer)
 	return SLURM_ERROR;
 }
 
+static void
+_pack_checkpoint_msg(checkpoint_msg_t *msg, Buf buffer)
+{
+	xassert ( msg != NULL );
+
+	pack16 ( msg -> op,      buffer ) ;
+	pack32 ( msg -> job_id,  buffer ) ;
+	pack32 ( msg -> step_id, buffer ) ;
+}
+
+static int
+_unpack_checkpoint_msg(checkpoint_msg_t **msg_ptr, Buf buffer)
+{
+	checkpoint_msg_t * msg;
+	xassert ( msg_ptr != NULL );
+
+	msg = xmalloc ( sizeof (checkpoint_msg_t) ) ;
+	*msg_ptr = msg ;
+
+	safe_unpack16 ( & msg -> op ,      buffer ) ;
+	safe_unpack32 ( & msg -> job_id  , buffer ) ;
+	safe_unpack32 ( & msg -> step_id , buffer ) ;
+	return SLURM_SUCCESS;
+
+    unpack_error:
+	*msg_ptr = NULL;
+	xfree(msg);
+	return SLURM_ERROR;
+}
+
+static void
+_pack_checkpoint_resp_msg(checkpoint_resp_msg_t *msg, Buf buffer)
+{
+	xassert ( msg != NULL );
+
+	pack32 ( msg -> ckpt_errno,  buffer ) ;
+	packstr ( msg -> ckpt_strerror, buffer ) ;
+}
+
+static int
+_unpack_checkpoint_resp_msg(checkpoint_resp_msg_t **msg_ptr, Buf buffer)
+{
+	checkpoint_resp_msg_t * msg;
+	uint16_t uint16_tmp;
+	xassert ( msg_ptr != NULL );
+
+	msg = xmalloc ( sizeof (checkpoint_resp_msg_t) ) ;
+	*msg_ptr = msg ;
+
+	safe_unpack32 ( & msg -> ckpt_errno , buffer ) ;
+	safe_unpackstr_xmalloc ( & msg -> ckpt_strerror, & uint16_tmp , buffer ) ;
+	return SLURM_SUCCESS;
+
+    unpack_error:
+	*msg_ptr = NULL;
+	xfree(msg);
+	return SLURM_ERROR;
+}
+
 /* template 
 void pack_ ( * msg , Buf buffer )
 {
-	assert ( msg != NULL );
+	xassert ( msg != NULL );
 
 	pack16 ( msg -> , buffer ) ;
 	pack32 ( msg -> , buffer ) ;
@@ -2809,7 +2912,7 @@ int unpack_ ( ** msg_ptr , Buf buffer )
 	uint16_t uint16_tmp;
 	* msg ;
 
-	assert ( msg_ptr != NULL );
+	xassert ( msg_ptr != NULL );
 
 	msg = xmalloc ( sizeof ( ) ) ;
 	*msg_ptr = msg;
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index b1f24f9f6b0..4fc138f9c2d 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -1,3 +1,3 @@
 # $Id$
 
-SUBDIRS = auth jobcomp sched switch
+SUBDIRS = auth checkpoint jobcomp sched switch
diff --git a/src/plugins/checkpoint/Makefile.am b/src/plugins/checkpoint/Makefile.am
new file mode 100644
index 00000000000..e3389d7a37e
--- /dev/null
+++ b/src/plugins/checkpoint/Makefile.am
@@ -0,0 +1,4 @@
+# $Id$
+# Makefile for checkpoint plugins
+
+SUBDIRS = none
diff --git a/src/plugins/checkpoint/none/Makefile.am b/src/plugins/checkpoint/none/Makefile.am
new file mode 100644
index 00000000000..b737d923058
--- /dev/null
+++ b/src/plugins/checkpoint/none/Makefile.am
@@ -0,0 +1,19 @@
+# $Id$
+# Makefile for checkpoint/none plugin
+
+AUTOMAKE_OPTIONS = foreign
+
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic 
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
+
+pkglib_LTLIBRARIES = checkpoint_none.la
+
+# Null checkpoint plugin.
+checkpoint_none_la_SOURCES = checkpoint_none.c
+checkpoint_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
+
+if HAVE_AIX
+checkpoint_none_la_LIBADD  = $(top_builddir)/src/common/libcommon.la   \
+			$(top_builddir)/src/api/libslurm.la
+endif
diff --git a/src/plugins/checkpoint/none/checkpoint_none.c b/src/plugins/checkpoint/none/checkpoint_none.c
new file mode 100644
index 00000000000..87b5284ffb1
--- /dev/null
+++ b/src/plugins/checkpoint/none/checkpoint_none.c
@@ -0,0 +1,115 @@
+/*****************************************************************************\
+ *  checkpoint_none.c - NO-OP slurm checkpoint plugin.
+ *****************************************************************************
+ *  Copyright (C) 2004 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Morris Jette <jette1@llnl.gov>
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#if HAVE_STDINT_H
+#  include <stdint.h>
+#endif
+#if HAVE_INTTYPES_H
+#  include <inttypes.h>
+#endif
+
+#include <stdio.h>
+#include <slurm/slurm.h>
+#include <slurm/slurm_errno.h>
+
+#include "src/slurmctld/slurmctld.h"
+
+/*
+ * These variables are required by the generic plugin interface.  If they
+ * are not found in the plugin, the plugin loader will ignore it.
+ *
+ * plugin_name - a string giving a human-readable description of the
+ * plugin.  There is no maximum length, but the symbol must refer to
+ * a valid string.
+ *
+ * plugin_type - a string suggesting the type of the plugin or its
+ * applicability to a particular form of data or method of data handling.
+ * If the low-level plugin API is used, the contents of this string are
+ * unimportant and may be anything.  SLURM uses the higher-level plugin
+ * interface which requires this string to be of the form
+ *
+ *	<application>/<method>
+ *
+ * where <application> is a description of the intended application of
+ * the plugin (e.g., "checkpoint" for SLURM checkpoint) and <method>
+ * is a description of how this plugin satisfies that application.  SLURM will
+ * only load checkpoint plugins if the plugin_type string has a 
+ * prefix of "checkpoint/".
+ *
+ * plugin_version - an unsigned 32-bit integer giving the version number
+ * of the plugin.  If major and minor revisions are desired, the major
+ * version number may be multiplied by a suitable magnitude constant such
+ * as 100 or 1000.  Various SLURM versions will likely require a certain
+ * minimum versions for their plugins as the checkpoint API matures.
+ */
+const char plugin_name[]       	= "Checkpoint NONE plugin";
+const char plugin_type[]       	= "checkpoint/none";
+const uint32_t plugin_version	= 90;
+
+/*
+ * init() is called when the plugin is loaded, before any other functions
+ * are called.  Put global initialization here.
+ */
+int init ( void )
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * The remainder of this file implements the standard SLURM checkpoint API.
+ */
+
+extern int slurm_ckpt_op ( enum check_opts op, 
+		struct step_record * step_ptr )
+{
+	return ESLURM_NOT_SUPPORTED;
+}
+
+extern int slurm_ckpt_error ( struct step_record * step_ptr, 
+		uint32_t *ckpt_errno, char **ckpt_strerror)
+{
+	if (ckpt_errno)
+		*ckpt_errno = ESLURM_NOT_SUPPORTED;
+	else
+		return EINVAL;
+
+	if (ckpt_strerror)
+		*ckpt_strerror = "This system does not support checkpointing";
+	else
+		return EINVAL;
+
+	return SLURM_SUCCESS;
+}
+
+int fini ( void )
+{
+	return SLURM_SUCCESS;
+}
+
diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c
index 57f430de4f7..645080caffd 100644
--- a/src/scontrol/scontrol.c
+++ b/src/scontrol/scontrol.c
@@ -79,6 +79,7 @@ static int input_words;	/* number of words of input permitted */
 static int one_liner;	/* one record per line if =1 */
 static int quiet_flag;	/* quiet=1, verbose=-1, normal=0 */
 
+static int	_checkpoint(char *op, char *job_step_id_str);
 static void	_delete_it (int argc, char *argv[]);
 static int	_get_command (int *argc, char *argv[]);
 static bool	_in_node_bit_list(int inx, int *node_list_array);
@@ -1127,7 +1128,7 @@ _process_command (int argc, char *argv[])
 	}
 	else if (strncasecmp (argv[0], "all", 3) == 0)
 		all_flag = 1;
-	else if (strncasecmp (argv[0], "completing", 1) == 0) {
+	else if (strncasecmp (argv[0], "completing", 3) == 0) {
 		if (argc > 1) {
 			exit_code = 1;
 			fprintf (stderr, 
@@ -1219,6 +1220,30 @@ _process_command (int argc, char *argv[])
 				slurm_perror ("slurm_reconfigure error");
 		}
 	}
+	else if (strncasecmp (argv[0], "checkpoint", 5) == 0) {
+		if (argc > 3) {
+			exit_code = 1;
+			if (quiet_flag != 1)
+				fprintf(stderr, 
+				        "too many arguments for keyword:%s\n", 
+				        argv[0]);
+		}
+		else if (argc < 3) {
+			exit_code = 1;
+			if (quiet_flag != 1)
+				fprintf(stderr, 
+				        "too few arguments for keyword:%s\n", 
+				        argv[0]);
+		}
+		else {
+			error_code =_checkpoint(argv[1], argv[2]);
+			if (error_code) {
+				exit_code = 1;
+				if (quiet_flag != 1)
+					slurm_perror ("slurm_checkpoint error");
+			}
+		}
+	}
 	else if (strncasecmp (argv[0], "show", 3) == 0) {
 		if (argc > 3) {
 			exit_code = 1;
@@ -1307,7 +1332,7 @@ _process_command (int argc, char *argv[])
 		}		
 		_update_it ((argc - 1), &argv[1]);
 	}
-	else if (strncasecmp (argv[0], "delete", 1) == 0) {
+	else if (strncasecmp (argv[0], "delete", 3) == 0) {
 		if (argc < 2) {
 			exit_code = 1;
 			fprintf (stderr, "too few arguments for %s keyword\n",
@@ -1362,7 +1387,7 @@ _delete_it (int argc, char *argv[])
 		}
 	} else {
 		exit_code = 1;
-		fprintf(stderr, "Invalid deletion entity: %s", argv[1]);
+		fprintf(stderr, "Invalid deletion entity: %s\n", argv[1]);
 	}
 }
 
@@ -1771,6 +1796,8 @@ scontrol [<OPTION>] [<COMMAND>]                                            \n\
                               generating a core file.                      \n\
      all                      display information about all partitions, including\n\
                               hidden partitions.                            \n\
+     checkpoint <CH_OP><step> perform a checkpoint operation on identified  \n\
+                              job step \n\
      completing               display jobs in completing state along with  \n\
                               their completing or down nodes               \n\
      delete <SPECIFICATIONS>  delete the specified partition, kill its jobs\n\
@@ -1805,8 +1832,73 @@ scontrol [<OPTION>] [<COMMAND>]                                            \n\
   file. You may wish to use the \"show\" keyword then use its output as    \n\
   input for the update keyword, editing as needed.                         \n\
                                                                            \n\
+  <CH_OP> identify checkpoint operations and may be \"disable\", \"enable\",\n\
+  \"create\", \"vacate\", \"restart\", or \"error\". \n\
+                                                                           \n\
   All commands and options are case-insensitive, although node names and   \n\
   partition names tests are case-sensitive (node names \"LX\" and \"lx\"   \n\
   are distinct).                                                       \n\n");
 
 }
+
+/* 
+ * _checkpoint - update the slurm partition configuration per the 
+ *	supplied arguments 
+ * IN op - checkpoint operation
+ * IN job_step_id_str - either a job name (for all steps of the given job) or 
+ *			a step name: "<jid>.<step_id>"
+ * RET 0 if no slurm error, errno otherwise. parsing error prints 
+ *			error message and returns 0
+ */
+static int _checkpoint(char *op, char *job_step_id_str)
+{
+	int rc = SLURM_SUCCESS;
+	uint32_t job_id = 0, step_id = 0, step_id_set = 0;
+	char *next_str;
+	uint32_t ckpt_errno;
+	char *ckpt_strerror = NULL;
+
+	if (job_step_id_str) {
+		job_id = (uint32_t) strtol (job_step_id_str, &next_str, 10);
+		if (next_str[0] == '.') {
+			step_id = (uint32_t) strtol (&next_str[1], &next_str, 10);
+			step_id_set = 1;
+		} else
+			step_id = NO_VAL;
+		if (next_str[0] != '\0') {
+			fprintf(stderr, "Invalid job step name\n");
+			return 0;
+		}
+	} else {
+		fprintf(stderr, "Invalid job step name\n");
+		return 0;
+	}
+
+	if (strncasecmp(op, "disable", 3) == 0)
+		rc = slurm_checkpoint (CHECK_DISABLE, job_id, step_id);
+	else if (strncasecmp(op, "enable", 2) == 0)
+		rc = slurm_checkpoint (CHECK_ENABLE, job_id, step_id);
+
+	else if (strncasecmp(op, "create", 2) == 0)
+		rc = slurm_checkpoint (CHECK_CREATE, job_id, step_id);
+	else if (strncasecmp(op, "vacate", 2) == 0)
+		rc = slurm_checkpoint (CHECK_VACATE, job_id, step_id);
+	else if (strncasecmp(op, "resume", 2) == 0)
+		rc = slurm_checkpoint (CHECK_RESUME, job_id, step_id);
+
+	else if (strncasecmp(op, "error", 2) == 0) {
+		rc = slurm_checkpoint_error (job_id, step_id, 
+			&ckpt_errno, &ckpt_strerror);
+		if (rc == SLURM_SUCCESS) {
+			printf("error(%u): %s\n", ckpt_errno, ckpt_strerror);
+			free(ckpt_strerror);
+		}
+	}
+
+	else {
+		fprintf (stderr, "Invalid checkpoint operation: %s\n", op);
+		return 0;
+	}
+
+	return rc;
+}
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 3a83dd97e0c..3fee4be5cfd 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -46,6 +46,7 @@
 
 #include <slurm/slurm_errno.h>
 
+#include "src/common/checkpoint.h"
 #include "src/common/daemonize.h"
 #include "src/common/fd.h"
 #include "src/common/hostlist.h"
@@ -209,11 +210,12 @@ int main(int argc, char *argv[])
 		error("Unable to block signals");
 
 	/*
-	 * Initialize scheduling.
+	 * Initialize plugins.
 	 */
-	if ( slurm_sched_init() != SLURM_SUCCESS ) {
-		fatal( "failed to initialize scheduling" );
-	}
+	if ( slurm_sched_init() != SLURM_SUCCESS )
+		fatal( "failed to initialize scheduling plugin" );
+	if ( g_slurm_checkpoint_init() != SLURM_SUCCESS )
+		fatal( "failed to initialize checkpoint plugin" );
 
 	while (1) {
 		/* initialization for each primary<->backup switch */
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index b640879f4d3..ae9456c2a6f 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -40,6 +40,7 @@
 
 #include <slurm/slurm_errno.h>
 
+#include "src/common/checkpoint.h"
 #include "src/common/daemonize.h"
 #include "src/common/fd.h"
 #include "src/common/hostlist.h"
@@ -69,6 +70,7 @@ static int          _make_step_cred(struct step_record *step_rec,
 				    slurm_cred_t *slurm_cred);
 inline static void  _slurm_rpc_allocate_resources(slurm_msg_t * msg);
 inline static void  _slurm_rpc_allocate_and_run(slurm_msg_t * msg);
+inline static void  _slurm_rpc_checkpoint(slurm_msg_t * msg);
 inline static void  _slurm_rpc_dump_conf(slurm_msg_t * msg);
 inline static void  _slurm_rpc_dump_jobs(slurm_msg_t * msg);
 inline static void  _slurm_rpc_dump_nodes(slurm_msg_t * msg);
@@ -220,6 +222,10 @@ void slurmctld_req (slurm_msg_t * msg)
 			"SlurmctldPort == SlurmdPort");
 		slurm_send_rc_msg(msg, EINVAL);
 		break;
+	case REQUEST_CHECKPOINT:
+		_slurm_rpc_checkpoint(msg);
+		slurm_free_checkpoint_msg(msg->data);
+		break;
 	default:
 		error("invalid RPC msg_type=%d", msg->msg_type);
 		slurm_send_rc_msg(msg, EINVAL);
@@ -1669,3 +1675,52 @@ static void _update_cred_key(void)
 	slurm_cred_ctx_key_update(slurmctld_config.cred_ctx, 
 				  slurmctld_conf.job_credential_private_key);
 }
+
+/* Assorted checkpoint operations */
+inline static void  _slurm_rpc_checkpoint(slurm_msg_t * msg)
+{
+	int error_code = SLURM_SUCCESS;
+	DEF_TIMERS;
+	checkpoint_msg_t *ckpt_ptr = (checkpoint_msg_t *) msg->data;
+	/* Locks: write job */
+	slurmctld_lock_t job_write_lock = { 
+		NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
+	uid_t uid;
+
+	START_TIMER;
+	debug2("Processing RPC: REQUEST_CHECKPOINT");
+	uid = g_slurm_auth_get_uid(msg->cred);
+
+	/* do RPC call */
+	lock_slurmctld(job_write_lock);
+	error_code = job_step_checkpoint(ckpt_ptr->op,
+			ckpt_ptr->job_id, ckpt_ptr->step_id, uid,
+			msg->conn_fd);
+	unlock_slurmctld(job_write_lock);
+	END_TIMER;
+
+	/* return result */
+	if (error_code) {
+		if (ckpt_ptr->step_id == NO_VAL)
+			info("_slurm_rpc_checkpoint for %u: %s", 
+				ckpt_ptr->job_id, slurm_strerror(error_code));
+		else
+			info("_slurm_rpc_checkpoint for %u.%u: %s", 
+				ckpt_ptr->job_id, ckpt_ptr->step_id, 
+				slurm_strerror(error_code));
+		slurm_send_rc_msg(msg, error_code);
+	} else if (ckpt_ptr->op == CHECK_ERROR) {
+		;	/* Response already sent */
+	} else {
+		if (ckpt_ptr->step_id == NO_VAL)
+			info("_slurm_rpc_checkpoint complete for %u %s",
+				ckpt_ptr->job_id, TIME_STR);
+		else
+			info("_slurm_rpc_checkpoint complete for %u.%u %s",
+				ckpt_ptr->job_id, ckpt_ptr->step_id, TIME_STR);
+		slurm_send_rc_msg(msg, SLURM_SUCCESS);
+
+		/* NOTE: This function provides it own locks */
+		schedule_job_save();
+	}
+}
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index f94d6fe280c..a82eb636216 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -669,6 +669,19 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag,
  */
 extern int job_step_cancel (uint32_t job_id, uint32_t job_step_id, uid_t uid );
 
+/*
+ * job_step_checkpoint - perform some checkpoint operation
+ * IN op - the operation to be performed
+ * IN job_id - id of the job
+ * IN step_id - id of the job step, NO_VAL indicates all steps of the indicated job
+ * IN uid - user id of the user issuing the RPC
+ * IN conn_fd - file descriptor on which to send reply
+ * RET 0 on success, otherwise ESLURM error code
+ */
+extern int job_step_checkpoint(enum check_opts op,
+		uint32_t job_id, uint32_t step_id, uid_t uid, 
+		slurm_fd conn_fd);
+
 /* 
  * job_complete - note the normal termination the specified job
  * IN job_id - id of the job which completed
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index be1613f79d8..ee47612e54c 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -40,6 +40,8 @@
 #include <slurm/slurm_errno.h>
 
 #include "src/common/bitstring.h"
+#include "src/common/checkpoint.h"
+#include "src/common/slurm_protocol_interface.h"
 #include "src/common/switch.h"
 #include "src/common/xstring.h"
 #include "src/slurmctld/agent.h"
@@ -48,6 +50,7 @@
 
 #define MAX_RETRIES 10
 
+static int _job_step_ckpt_error(struct step_record *step_ptr, slurm_fd conn_fd);
 static void _pack_ctld_job_step_info(struct step_record *step, Buf buffer);
 static bitstr_t * _pick_step_nodes (struct job_record  *job_ptr, 
 				    step_specs *step_spec );
@@ -685,3 +688,91 @@ bool step_on_node(struct job_record  *job_ptr, struct node_record *node_ptr)
 	list_iterator_destroy (step_iterator);
 	return found;
 }
+
+/*
+ * job_step_checkpoint - perform some checkpoint operation
+ * IN op - the operation to be performed
+ * IN job_id - id of the job
+ * IN step_id - id of the job step, NO_VAL indicates all steps of the indicated job
+ * IN uid - user id of the user issuing the RPC
+ * IN conn_fd - file descriptor on which to send reply
+ * RET 0 on success, otherwise ESLURM error code
+ */
+extern int job_step_checkpoint(enum check_opts op,
+		uint32_t job_id, uint32_t step_id, uid_t uid, 
+		slurm_fd conn_fd)
+{
+	int rc = SLURM_SUCCESS;
+	struct job_record *job_ptr;
+	struct step_record *step_ptr;
+
+	/* find the job */
+	job_ptr = find_job_record (job_id);
+	if (job_ptr == NULL)
+		return ESLURM_INVALID_JOB_ID;
+	if ((uid != job_ptr->user_id) && (uid != 0))
+		return ESLURM_ACCESS_DENIED ;
+
+	/* find the individual job step */
+	if (step_id != NO_VAL) {
+		step_ptr = find_step_record(job_ptr, (uint32_t) step_id);
+		if (step_ptr == NULL)
+			return ESLURM_INVALID_JOB_ID;
+		if (op == CHECK_ERROR) {
+			rc = _job_step_ckpt_error(step_ptr, conn_fd);
+		} else {
+			rc = g_slurm_checkpoint_op(op, step_ptr);
+			last_job_update = time(NULL);
+		}
+	}
+
+	/* operate on all of a job's steps */
+	else {
+		int update_rc = -2;
+		bool error_reply = false;
+		ListIterator step_iterator;
+
+		step_iterator = list_iterator_create (job_ptr->step_list);
+		while ((step_ptr = (struct step_record *) 
+					list_next (step_iterator))) {
+			if (op == CHECK_ERROR) {
+				rc = _job_step_ckpt_error(step_ptr, conn_fd);
+				error_reply = true;
+				break;
+			} else {
+				update_rc = g_slurm_checkpoint_op(op, step_ptr);
+				rc = MAX(rc, update_rc);
+			}
+		}
+		if (update_rc != -2)	/* some work done */
+			last_job_update = time(NULL);
+		if ((op == CHECK_ERROR) && 
+		    (!error_reply))	/* no steps found */
+			rc = ESLURM_INVALID_JOB_ID;
+		list_iterator_destroy (step_iterator);
+	}
+
+	return rc;
+}
+
+static int _job_step_ckpt_error(struct step_record *step_ptr, slurm_fd conn_fd)
+{
+	int rc;
+	uint32_t ckpt_errno;
+	char *ckpt_strerror;
+	slurm_msg_t resp_msg;
+	checkpoint_resp_msg_t resp_data;
+
+	rc = g_slurm_checkpoint_error(step_ptr, &ckpt_errno, &ckpt_strerror);
+	if (rc)
+		return rc;
+
+	resp_data.ckpt_errno = ckpt_errno;
+	resp_data.ckpt_strerror = ckpt_strerror;
+	resp_msg.msg_type = RESPONSE_CHECKPOINT;
+	resp_msg.data = &resp_data;
+	if (slurm_send_node_msg(conn_fd, &resp_msg) < 0)
+		rc = SLURM_SOCKET_ERROR;
+
+	return rc;
+}
-- 
GitLab