From 88163e9b912dfdfb5780b31908231ef4ddf36b9a Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 14 Dec 2007 22:55:39 +0000
Subject: [PATCH] Added checkpoint/xlch plugin for use with XLCH (Hongjia Cao,
 NUDT). Added srun option --checkpoint-path for use with XLCH (Hongjia Cao,
 NUDT).

---
 NEWS                                          |   2 +
 RELEASE_NOTES                                 |   2 +
 configure                                     |   3 +-
 configure.ac                                  |   1 +
 doc/man/man1/srun.1                           |   5 +
 doc/man/man5/slurm.conf.5                     |   4 +-
 slurm.spec                                    |   1 +
 slurm/slurm.h.in                              |  27 +
 src/api/checkpoint.c                          | 116 +++
 src/api/job_step_info.c                       |  12 +-
 src/api/step_ctx.c                            |   2 +
 src/api/step_launch.c                         |   1 +
 src/common/checkpoint.c                       |  22 +
 src/common/checkpoint.h                       |   3 +
 src/common/env.c                              |   6 +-
 src/common/env.h                              |   1 +
 src/common/slurm_protocol_defs.c              |  23 +
 src/common/slurm_protocol_defs.h              |  22 +
 src/common/slurm_protocol_pack.c              |  95 +++
 src/common/slurm_xlator.h                     |   2 +-
 src/common/stepd_api.c                        |  20 +
 src/common/stepd_api.h                        |   5 +
 src/plugins/checkpoint/Makefile.am            |   2 +-
 src/plugins/checkpoint/Makefile.in            |   2 +-
 src/plugins/checkpoint/aix/checkpoint_aix.c   |   5 +
 src/plugins/checkpoint/none/checkpoint_none.c |   7 +
 src/plugins/checkpoint/ompi/checkpoint_ompi.c |   7 +
 src/plugins/checkpoint/xlch/Makefile.am       |  24 +
 src/plugins/checkpoint/xlch/Makefile.in       | 564 ++++++++++++++
 src/plugins/checkpoint/xlch/checkpoint_xlch.c | 696 ++++++++++++++++++
 src/slurmctld/proc_req.c                      |  36 +
 src/slurmctld/slurmctld.h                     |  10 +
 src/slurmctld/step_mgr.c                      |  73 +-
 src/slurmd/slurmd/req.c                       |  50 ++
 src/slurmd/slurmstepd/req.c                   |  75 ++
 src/slurmd/slurmstepd/slurmstepd_job.c        |   3 +
 src/slurmd/slurmstepd/slurmstepd_job.h        |   1 +
 src/slurmd/slurmstepd/task.c                  |   1 +
 src/srun/allocate.c                           |   1 +
 src/srun/opt.c                                |  18 +-
 src/srun/opt.h                                |   1 +
 src/srun/srun.c                               |   2 +
 42 files changed, 1939 insertions(+), 14 deletions(-)
 create mode 100644 src/plugins/checkpoint/xlch/Makefile.am
 create mode 100644 src/plugins/checkpoint/xlch/Makefile.in
 create mode 100644 src/plugins/checkpoint/xlch/checkpoint_xlch.c

diff --git a/NEWS b/NEWS
index 3948e6be2cf..2fb080fde99 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,8 @@ documents those changes that are of interest to users and admins.
  -- Fix bug that prevented time value of "INFINITE" from being processed.
  -- Add new srun/sbatch option "--open-mode" to control how output/error 
     files are opened ("t" for truncate, "a" for append).
+ -- Added checkpoint/xlch plugin for use with XLCH (Hongjia Cao, NUDT).
+ -- Added srun option --checkpoint-path for use with XLCH (Hongjia Cao, NUDT).
 
 * Changes in SLURM 1.3.0-pre7
 =============================
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 57e4daac9ca..5725db06c63 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -42,6 +42,7 @@ COMMAND CHANGES
   specified job id. 
 * Support has been added for a much richer job dependency specification 
   including testing of exit codes and multiple dependencies.
+* The srun option --checkpoint-path has been added
 
 CONFIGURATION FILE CHANGES
 
@@ -75,6 +76,7 @@ CONFIGURATION FILE CHANGES
 * The partition MaxTime format now accepts minutes, minutes:seconds, 
   hours:minutes:seconds, days-hours, days-hours:minutes, 
   days-hours:minutes:seconds or "UNLIMITED".
+* Checkpoint plugin added for XLCH.
 * See "man slurm.conf" for more information.
 
 OTHER CHANGES
diff --git a/configure b/configure
index a154749f4eb..02edad6789c 100755
--- a/configure
+++ b/configure
@@ -26904,7 +26904,7 @@ _ACEOF
 
 
 
-ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm-perl/Makefile.PL contribs/torque/Makefile src/Makefile src/api/Makefile src/common/Makefile src/sacct/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/srun/Makefile src/slurmd/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobacct_storage/Makefile src/plugins/jobacct_storage/filetxt/Makefile src/plugins/jobacct_storage/mysql/Makefile src/plugins/jobacct_storage/pgsql/Makefile src/plugins/jobacct_storage/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/gang/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/block_allocator/Makefile src/plugins/select/bluegene/plugin/Makefile src/plugins/select/linear/Makefile src/plugins/select/cons_res/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/none/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/common/Makefile testsuite/slurm_unit/slurmctld/Makefile testsuite/slurm_unit/slurmd/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile"
+ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm-perl/Makefile.PL contribs/torque/Makefile src/Makefile src/api/Makefile src/common/Makefile src/sacct/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/srun/Makefile src/slurmd/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/xlch/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobacct_storage/Makefile src/plugins/jobacct_storage/filetxt/Makefile src/plugins/jobacct_storage/mysql/Makefile src/plugins/jobacct_storage/pgsql/Makefile src/plugins/jobacct_storage/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/gang/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/block_allocator/Makefile src/plugins/select/bluegene/plugin/Makefile src/plugins/select/linear/Makefile src/plugins/select/cons_res/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/none/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/common/Makefile testsuite/slurm_unit/slurmctld/Makefile testsuite/slurm_unit/slurmd/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile"
 
 
 cat >confcache <<\_ACEOF
@@ -27656,6 +27656,7 @@ do
     "src/plugins/checkpoint/aix/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/checkpoint/aix/Makefile" ;;
     "src/plugins/checkpoint/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/checkpoint/none/Makefile" ;;
     "src/plugins/checkpoint/ompi/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/checkpoint/ompi/Makefile" ;;
+    "src/plugins/checkpoint/xlch/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/checkpoint/xlch/Makefile" ;;
     "src/plugins/crypto/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/crypto/Makefile" ;;
     "src/plugins/crypto/munge/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/crypto/munge/Makefile" ;;
     "src/plugins/crypto/openssl/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/crypto/openssl/Makefile" ;;
diff --git a/configure.ac b/configure.ac
index 8b9622a755e..2407a4cf141 100644
--- a/configure.ac
+++ b/configure.ac
@@ -301,6 +301,7 @@ AC_CONFIG_FILES([Makefile
 		 src/plugins/checkpoint/aix/Makefile
 		 src/plugins/checkpoint/none/Makefile
 		 src/plugins/checkpoint/ompi/Makefile
+		 src/plugins/checkpoint/xlch/Makefile
 		 src/plugins/crypto/Makefile
 		 src/plugins/crypto/munge/Makefile
 		 src/plugins/crypto/openssl/Makefile
diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1
index 8c443a56b3c..6484ddbdf1b 100644
--- a/doc/man/man1/srun.1
+++ b/doc/man/man1/srun.1
@@ -78,6 +78,11 @@ Acceptable time formats include "minutes", "minutes:seconds",
 "hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and 
 "days\-hours:minutes:seconds".
 
+.TP
+\fB\-\-checkpoint\-path\fR=\fIdirectory\fR
+Specifies the directory into which the job step's checkpoint should 
+be written (used by the checkpoint/xlch plugin only).
+
 .TP
 \fB\-C\fR, \fB\-\-constraint\fR[=]<\fIlist\fR>
 Specify a list of constraints. 
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 9b7984574f0..43892ddaebd 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -82,9 +82,9 @@ The slurmctld daemon must be restarted for a change in \fBCheckpointType\fR
 to take effect. 
 Acceptable values at present include
 "checkpoint/aix" (only on AIX systems),
-"checkpoint/ompi" (requires OpenMPI version 1.3 or higher), and
+"checkpoint/ompi" (requires OpenMPI version 1.3 or higher),
+"checkpoint/xlch" (for XLCH, requires that SlurmUser be root), and
 "checkpoint/none".
-(only on AIX systems). 
 The default value is "checkpoint/none".
 
 .TP
diff --git a/slurm.spec b/slurm.spec
index 3efb37ee507..79658b3e346 100644
--- a/slurm.spec
+++ b/slurm.spec
@@ -372,6 +372,7 @@ rm -rf $RPM_BUILD_ROOT
 %dir %{_libdir}/slurm
 %{_libdir}/slurm/checkpoint_none.so
 %{_libdir}/slurm/checkpoint_ompi.so
+%{_libdir}/slurm/checkpoint_xlch.so
 %{_libdir}/slurm/jobacct_gather_aix.so
 %{_libdir}/slurm/jobacct_gather_linux.so
 %{_libdir}/slurm/jobacct_gather_none.so
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index cfdf6c02a3d..1f037fdf643 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -742,6 +742,7 @@ typedef struct {
 				   "false" to accept at most one task per
 				   processor. "false" by default. */
 	uint16_t ckpt_interval;	/* checkpoint interval in minutes */
+	char *ckpt_path;	/* path to store checkpoint image files */
 	uint16_t verbose_level; /* for extra logging decisions in step
 				   launch api */
 } slurm_step_ctx_params_t;
@@ -788,6 +789,7 @@ typedef struct {
 	char *mpi_plugin_name;
 	uint8_t open_mode;
 	bool pty;
+	char *ckpt_path;
 } slurm_step_launch_params_t;
 
 typedef struct {
@@ -818,6 +820,7 @@ typedef struct {
 				 * start_range_1, end_range_1,
 				 * start_range_2, .., -1  */
 	uint16_t ckpt_interval;	/* checkpoint interval in minutes */
+	char *ckpt_path;	/* path to store checkpoint image files */
 } job_step_info_t;
 
 typedef struct job_step_info_response_msg {
@@ -1958,6 +1961,21 @@ extern int slurm_checkpoint_complete PARAMS(( uint32_t job_id,
 		uint32_t step_id, time_t begin_time, 
 		uint32_t error_code, char *error_msg ));
 
+/*
+ * slurm_checkpoint_task_complete - note the completion of a task's checkpoint
+ *	operation.
+ * IN job_id  - job on which to perform operation
+ * IN step_id - job step on which to perform operation
+ * IN task_id - task which completed the operation
+ * IN begin_time - time at which checkpoint began
+ * IN error_code - error code, highest value for all complete calls is preserved
+ * IN error_msg - error message, preserved for highest error_code
+ * RET 0 or a slurm error code
+ */
+extern int slurm_checkpoint_task_complete (uint32_t job_id, uint32_t step_id,
+					   uint32_t task_id, time_t begin_time, 
+					   uint32_t error_code, char *error_msg);
+
 /*
  * slurm_checkpoint_error - gather error information for the last checkpoint
  *	operation for some job step
@@ -1975,6 +1993,15 @@ extern int slurm_checkpoint_error PARAMS(( uint32_t job_id,
 		uint32_t step_id, uint32_t *error_code, 
 		char **error_msg ));
 
+/*
+ * slurm_get_checkpoint_file_path - return the checkpoint file
+ *      path of this process, creating the directory if needed.
+ * IN len: length of the file path buffer
+ * OUT buf: buffer to store the checkpoint file path
+ * RET: 0 on success, -1 on failure with errno set
+ */
+extern int slurm_get_checkpoint_file_path(size_t len, char *buf);
+
 /*****************************************************************************\
  *      SLURM HOSTLIST FUNCTIONS
 \*****************************************************************************/
diff --git a/src/api/checkpoint.c b/src/api/checkpoint.c
index b1fe76ccfdd..fabf3c14778 100644
--- a/src/api/checkpoint.c
+++ b/src/api/checkpoint.c
@@ -41,6 +41,10 @@
 #endif
 
 #include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
 #include <slurm/slurm.h>
 
 #include "src/common/checkpoint.h"
@@ -294,3 +298,115 @@ _handle_rc_msg(slurm_msg_t *msg)
 	slurm_seterrno(rc);
 	return rc;
 }
+
+/*
+ * slurm_checkpoint_task_complete - note the completion of a task's checkpoint
+ *	operation.
+ * IN job_id  - job on which to perform operation
+ * IN step_id - job step on which to perform operation
+ * IN task_id - task which completed the operation
+ * IN begin_time - time at which checkpoint began
+ * IN error_code - error code, highest value for all complete calls is preserved
+ * IN error_msg - error message, preserved for highest error_code
+ * RET 0 or a slurm error code
+ */
+extern int slurm_checkpoint_task_complete (uint32_t job_id, uint32_t step_id,
+		uint32_t task_id, time_t begin_time, uint32_t error_code, char *error_msg)
+{
+	int rc;
+	slurm_msg_t msg;
+	checkpoint_task_comp_msg_t req;
+
+	slurm_msg_t_init(&msg);
+	req.job_id       = job_id;
+	req.step_id      = step_id;
+	req.task_id      = task_id;
+	req.begin_time   = begin_time;
+	req.error_code   = error_code;
+	req.error_msg    = error_msg;
+	msg.msg_type     = REQUEST_CHECKPOINT_TASK_COMP;
+	msg.data         = &req;
+
+	if (slurm_send_recv_controller_rc_msg(&msg, &rc) < 0)
+		return SLURM_ERROR;
+	if (rc)
+		slurm_seterrno_ret(rc);
+	return SLURM_SUCCESS;
+}
+
+extern char * __progname;
+/*
+ * slurm_get_checkpoint_file_path - return the checkpoint file
+ *      path of this process, creating the directory if needed.
+ * IN len: length of the file path buffer
+ * OUT buf: buffer to store the checkpoint file path
+ * RET: 0 on success, -1 on failure with errno set
+ */
+extern int
+slurm_get_checkpoint_file_path(size_t len, char *buf)
+{
+       char *ckpt_path, *job_id, *step_id, *proc_id;
+       struct stat mystat;
+       int idx;
+
+       len --;                 /* for a terminating 0 */
+
+       ckpt_path = getenv("SLURM_CHECKPOINT_PATH");
+       if (ckpt_path == NULL) { /* this should not happen since the program may chdir */
+               ckpt_path = getcwd(buf, len);
+               if (ckpt_path == NULL)  /* ERANGE: len is too short */
+                       return -1;
+       } else {
+               if (snprintf(buf, len, "%s", ckpt_path) >= len) { /* glibc >= 2.1 */
+                       errno = ERANGE;
+                       return -1;
+               }
+               ckpt_path = buf;
+       }
+       idx = strlen(ckpt_path) - 1;
+       while (idx > 0 && ckpt_path[idx] == '/')
+               ckpt_path[idx --] = 0;
+
+       if (stat(ckpt_path, &mystat) < 0)
+               return -1;
+       if (! S_ISDIR(mystat.st_mode)) {
+               errno = ENOTDIR;
+               return -1;
+       }
+
+       job_id = getenv("SLURM_JOBID");
+       step_id = getenv("SLURM_STEPID");
+       proc_id = getenv("SLURM_PROCID");
+       if (job_id == NULL || step_id == NULL || proc_id == NULL) {
+               errno = ENODATA;
+               return -1;
+       }
+       idx = strlen(buf);
+       if (snprintf(buf + idx, len - idx, "/%s.%s", job_id, step_id) >= len - idx) {
+               errno = ERANGE;
+               return -1;
+       }
+
+       if (stat(buf, &mystat) < 0) {
+               if (errno == ENOENT) { /* dir does not exists */
+                       if (mkdir(buf, 0750) < 0 && errno != EEXIST)
+                               return -1;
+                       if (stat(buf, &mystat) < 0)
+                               return -1;
+               }
+               else
+                       return -1;
+       }
+       if (! S_ISDIR(mystat.st_mode)) {
+               errno = ENOTDIR;
+               return -1;
+       }
+
+       idx = strlen(buf);
+       if (snprintf(buf + idx, len - idx, "/%s.%s.ckpt", __progname, proc_id) >= len - idx) {
+               errno = ERANGE;
+               return -1;
+       }
+
+       return 0;
+}
diff --git a/src/api/job_step_info.c b/src/api/job_step_info.c
index af139d15429..a66aefcfb64 100644
--- a/src/api/job_step_info.c
+++ b/src/api/job_step_info.c
@@ -128,11 +128,21 @@ slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr,
 
 	/****** Line 2 ******/
 	snprintf(tmp_line, sizeof(tmp_line),
-		"Partition=%s Nodes=%s Name=%s Network=%s Checkpoint=%u\n\n", 
+		"Partition=%s Nodes=%s Name=%s Network=%s Checkpoint=%u", 
 		job_step_ptr->partition, job_step_ptr->nodes,
 		job_step_ptr->name, job_step_ptr->network,
 		job_step_ptr->ckpt_interval);
 	xstrcat(out, tmp_line);
+	if (one_liner)
+		xstrcat(out, " ");
+	else
+		xstrcat(out, "\n   ");
+
+	/****** Line 3 ******/
+	snprintf(tmp_line, sizeof(tmp_line),
+		"CheckpointPath=%s\n\n", 
+		 job_step_ptr->ckpt_path);
+	xstrcat(out, tmp_line);
 
 	return out;
 }
diff --git a/src/api/step_ctx.c b/src/api/step_ctx.c
index d061ec07c91..59f354f35dc 100644
--- a/src/api/step_ctx.c
+++ b/src/api/step_ctx.c
@@ -81,6 +81,7 @@ static job_step_create_request_msg_t *_create_step_request(
 	step_req->exclusive  = step_params->exclusive;
 	step_req->immediate  = step_params->immediate;
 	step_req->ckpt_interval = step_params->ckpt_interval;
+	step_req->ckpt_path = xstrdup(step_params->ckpt_path);
 	step_req->task_dist = step_params->task_dist;
 	step_req->plane_size = step_params->plane_size;
 	step_req->node_list = xstrdup(step_params->node_list);
@@ -439,6 +440,7 @@ extern void slurm_step_ctx_params_t_init (slurm_step_ctx_params_t *ptr)
 	ptr->task_dist = SLURM_DIST_CYCLIC;
 	ptr->plane_size = (uint16_t)NO_VAL;
 	ptr->ckpt_interval = 0;
+	ptr->ckpt_path = NULL;
 
 	ptr->uid = getuid();
 
diff --git a/src/api/step_launch.c b/src/api/step_launch.c
index db8fb3eed71..661eb48249c 100644
--- a/src/api/step_launch.c
+++ b/src/api/step_launch.c
@@ -236,6 +236,7 @@ int slurm_step_launch (slurm_step_ctx_t *ctx,
 	launch.task_dist	= params->task_dist;
 	launch.plane_size	= params->plane_size;
 	launch.pty              = params->pty;
+	launch.ckpt_path        = params->ckpt_path;
 	launch.open_mode        = params->open_mode;
 	launch.options          = job_options_create();
 	launch.complete_nodelist = 
diff --git a/src/common/checkpoint.c b/src/common/checkpoint.c
index 415ce2540a2..c01a68922a9 100644
--- a/src/common/checkpoint.c
+++ b/src/common/checkpoint.c
@@ -62,6 +62,8 @@ typedef struct slurm_checkpoint_ops {
 			 uint32_t *error_code, char **error_msg);
 	int	(*ckpt_comp) (struct step_record * step_ptr, time_t event_time,
 			 uint32_t error_code, char *error_msg);
+	int	(*ckpt_task_comp) (struct step_record * step_ptr, uint32_t task_id,
+			 time_t event_time, uint32_t error_code, char *error_msg);
 
 	int	(*ckpt_alloc_jobinfo) (check_jobinfo_t *jobinfo);
 	int	(*ckpt_free_jobinfo) (check_jobinfo_t jobinfo);
@@ -146,6 +148,7 @@ _slurm_checkpoint_get_ops( slurm_checkpoint_context_t c )
 	static const char *syms[] = {
 		"slurm_ckpt_op",
 		"slurm_ckpt_comp",
+		"slurm_ckpt_task_comp",
 		"slurm_ckpt_alloc_job",
 		"slurm_ckpt_free_job",
 		"slurm_ckpt_pack_job",
@@ -277,6 +280,25 @@ checkpoint_comp(void * step_ptr, time_t event_time, uint32_t error_code,
 	return retval;
 }
 
+extern int
+checkpoint_task_comp(void * step_ptr, uint32_t task_id, time_t event_time,
+		     uint32_t error_code, char *error_msg)
+{
+	int retval = SLURM_SUCCESS;
+
+	slurm_mutex_lock( &context_lock );
+	if ( g_context )
+		retval = (*(g_context->ops.ckpt_task_comp))(
+			(struct step_record *) step_ptr, task_id, 
+			event_time, error_code, error_msg);
+	else {
+		error ("slurm_checkpoint plugin context not initialized");
+		retval = ENOENT;
+	}
+	slurm_mutex_unlock( &context_lock );
+	return retval;
+}
+
 /* allocate and initialize a job step's checkpoint context */
 extern int checkpoint_alloc_jobinfo(check_jobinfo_t *jobinfo)
 {
diff --git a/src/common/checkpoint.h b/src/common/checkpoint.h
index bf9b02f5288..51c1cd4adac 100644
--- a/src/common/checkpoint.h
+++ b/src/common/checkpoint.h
@@ -78,6 +78,9 @@ extern int checkpoint_op(uint16_t op, uint16_t data, void * step_ptr,
 extern int checkpoint_comp(void * step_ptr, time_t event_time, uint32_t error_code,
 		char *error_msg);
 
+extern int checkpoint_task_comp(void * step_ptr, uint32_t task_id, 
+			time_t event_time, uint32_t error_code, char *error_msg);
+
 /* gather checkpoint error info */
 extern int checkpoint_error(void * step_ptr, 
 		uint16_t *ckpt_errno, char **ckpt_strerror);
diff --git a/src/common/env.c b/src/common/env.c
index 75aab393569..37cbab2d1e7 100644
--- a/src/common/env.c
+++ b/src/common/env.c
@@ -653,7 +653,11 @@ int setup_env(env_t *env)
 		error("Can't set SLURM_PTY_WIN_ROW env variable");
 		rc = SLURM_FAILURE;
 	}
-
+	if (env->ckpt_path 
+        && setenvf(&env->env, "SLURM_CHECKPOINT_PATH", "%s", env->ckpt_path)) {
+		error("Can't set SLURM_CHECKPOINT_PATH env variable");
+		rc = SLURM_FAILURE;
+	}
 	return rc;
 }
 
diff --git a/src/common/env.h b/src/common/env.h
index 44c3def0439..e4fe65b968d 100644
--- a/src/common/env.h
+++ b/src/common/env.h
@@ -73,6 +73,7 @@ typedef struct env_options {
 	uint16_t pty_port;	/* used to communicate window size changes */
 	uint8_t ws_col;		/* window size, columns */
 	uint8_t ws_row;		/* window size, row count */
+	char *ckpt_path;	/* --ckpt-path=                 */
 } env_t;
 
 
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index aa77dbcc285..4f1d7397511 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -324,6 +324,7 @@ void slurm_free_job_step_create_request_msg(job_step_create_request_msg_t *
 		xfree(msg->name);
 		xfree(msg->network);
 		xfree(msg->node_list);
+		xfree(msg->ckpt_path);
 		xfree(msg);
 	}
 }
@@ -424,6 +425,8 @@ void slurm_free_launch_tasks_request_msg(launch_tasks_request_msg_t * msg)
 	xfree(msg->task_epilog);
 	xfree(msg->complete_nodelist);
 
+	xfree(msg->ckpt_path);
+
 	if (msg->switch_job)
 		switch_free_jobinfo(msg->switch_job);
 
@@ -472,6 +475,11 @@ void slurm_free_kill_tasks_msg(kill_tasks_msg_t * msg)
 	xfree(msg);
 }
 
+void slurm_free_checkpoint_tasks_msg(checkpoint_tasks_msg_t * msg)
+{
+	xfree(msg);
+}
+
 void slurm_free_epilog_complete_msg(epilog_complete_msg_t * msg)
 {
 	if (msg) {
@@ -537,6 +545,14 @@ void inline slurm_free_checkpoint_comp_msg(checkpoint_comp_msg_t *msg)
 	}
 }
 
+void inline slurm_free_checkpoint_task_comp_msg(checkpoint_task_comp_msg_t *msg)
+{
+	if (msg) {
+		xfree(msg->error_msg);
+		xfree(msg);
+	}
+}
+
 void inline slurm_free_checkpoint_resp_msg(checkpoint_resp_msg_t *msg)
 {
 	if (msg) {
@@ -997,6 +1013,7 @@ static void _slurm_free_job_step_info_members (job_step_info_t * msg)
 	if (msg != NULL) {
 		xfree(msg->partition);
 		xfree(msg->nodes);
+		xfree(msg->ckpt_path);
 	}
 }
 
@@ -1192,6 +1209,9 @@ extern int slurm_free_msg_data(slurm_msg_type_t type, void *data)
 	case REQUEST_CHECKPOINT_COMP:
 		slurm_free_checkpoint_comp_msg(data);
 		break;
+	case REQUEST_CHECKPOINT_TASK_COMP:
+		slurm_free_checkpoint_task_comp_msg(data);
+		break;
 	case REQUEST_SUSPEND:
 		slurm_free_suspend_msg(data);
 		break;
@@ -1220,6 +1240,9 @@ extern int slurm_free_msg_data(slurm_msg_type_t type, void *data)
 	case REQUEST_TERMINATE_TASKS:
 		slurm_free_kill_tasks_msg(data);
 		break;
+	case REQUEST_CHECKPOINT_TASKS:
+		slurm_free_checkpoint_tasks_msg(data);
+		break;
 	case REQUEST_KILL_TIMELIMIT:
 		slurm_free_timelimit_msg(data);
 		break; 
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index 93667acbcc2..818127f2906 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -155,6 +155,7 @@ typedef enum {
 	REQUEST_CHECKPOINT,
 	RESPONSE_CHECKPOINT,
 	REQUEST_CHECKPOINT_COMP,
+	REQUEST_CHECKPOINT_TASK_COMP,
 	RESPONSE_CHECKPOINT_COMP,
 	REQUEST_SUSPEND,
 	RESPONSE_SUSPEND,
@@ -173,6 +174,7 @@ typedef enum {
 	RESPONSE_LAUNCH_TASKS,
 	MESSAGE_TASK_EXIT,
 	REQUEST_SIGNAL_TASKS,
+	REQUEST_CHECKPOINT_TASKS,
 	REQUEST_TERMINATE_TASKS,
 	REQUEST_REATTACH_TASKS,
 	RESPONSE_REATTACH_TASKS,
@@ -364,6 +366,13 @@ typedef struct kill_tasks_msg {
 	uint32_t signal;
 } kill_tasks_msg_t;
 
+typedef struct checkpoint_tasks_msg {
+	uint32_t job_id;
+	uint32_t job_step_id;
+	uint32_t signal;
+	time_t timestamp;
+} checkpoint_tasks_msg_t;
+
 typedef struct epilog_complete_msg {
 	uint32_t job_id;
 	uint32_t return_code;
@@ -398,6 +407,7 @@ typedef struct job_step_specs {
 	char *node_list;	/* list of required nodes */
 	char *network;		/* network use spec */
 	char *name;		/* name of the job step, default "" */
+	char *ckpt_path;	/* path to store checkpoint image files */
 	uint8_t overcommit;     /* flag, 1 to allow overcommit of processors,
 				   0 to disallow overcommit. default is 0 */
 } job_step_create_request_msg_t;
@@ -472,6 +482,7 @@ typedef struct launch_tasks_request_msg {
 	switch_jobinfo_t switch_job;	/* switch credential for the job */
 	job_options_t options;  /* Arbitrary job options */
 	char *complete_nodelist;
+	char *ckpt_path;	/* checkpoint path */
 } launch_tasks_request_msg_t;
 
 typedef struct task_user_managed_io_msg {
@@ -587,6 +598,15 @@ typedef struct checkpoint_comp_msg {
 	char *   error_msg;	/* error message on failure */
 } checkpoint_comp_msg_t;
 
+typedef struct checkpoint_task_comp_msg {
+	uint32_t job_id;	/* slurm job_id */
+	uint32_t step_id;	/* slurm step_id */
+	uint32_t task_id;	/* task id */
+	time_t   begin_time;	/* time checkpoint began */
+	uint32_t error_code;	/* error code on failure */
+	char *   error_msg;	/* error message on failure */
+} checkpoint_task_comp_msg_t;
+
 typedef struct checkpoint_resp_msg {
 	time_t   event_time;	/* time of checkpoint start/finish */
 	uint32_t error_code;	/* error code on failure */
@@ -690,6 +710,7 @@ extern void slurm_msg_t_init (slurm_msg_t *msg);
 extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src);
 
 /* free message functions */
+void slurm_free_checkpoint_tasks_msg(checkpoint_tasks_msg_t * msg);
 void inline slurm_free_last_update_msg(last_update_msg_t * msg);
 void inline slurm_free_return_code_msg(return_code_msg_t * msg);
 void inline slurm_free_job_alloc_info_msg(job_alloc_info_msg_t * msg);
@@ -757,6 +778,7 @@ void inline slurm_free_srun_timeout_msg(srun_timeout_msg_t * msg);
 void inline slurm_free_srun_user_msg(srun_user_msg_t * msg);
 void inline slurm_free_checkpoint_msg(checkpoint_msg_t *msg);
 void inline slurm_free_checkpoint_comp_msg(checkpoint_comp_msg_t *msg);
+void inline slurm_free_checkpoint_task_comp_msg(checkpoint_task_comp_msg_t *msg);
 void inline slurm_free_checkpoint_resp_msg(checkpoint_resp_msg_t *msg);
 void inline slurm_free_suspend_msg(suspend_msg_t *msg);
 void slurm_free_resource_allocation_response_msg (
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 0a48ea054c9..d76ca673b9a 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -165,6 +165,9 @@ static int _unpack_task_user_managed_io_stream_msg(task_user_managed_io_msg_t **
 static void _pack_cancel_tasks_msg(kill_tasks_msg_t * msg, Buf buffer);
 static int _unpack_cancel_tasks_msg(kill_tasks_msg_t ** msg_ptr, Buf buffer);
 
+static void _pack_checkpoint_tasks_msg(checkpoint_tasks_msg_t * msg, Buf buffer);
+static int _unpack_checkpoint_tasks_msg(checkpoint_tasks_msg_t ** msg_ptr, Buf buffer);
+
 static void _pack_launch_tasks_response_msg(launch_tasks_response_msg_t *
 					    msg, Buf buffer);
 static int _unpack_launch_tasks_response_msg(launch_tasks_response_msg_t **
@@ -300,6 +303,10 @@ static void _pack_checkpoint_comp(checkpoint_comp_msg_t *msg, Buf buffer);
 static int  _unpack_checkpoint_comp(checkpoint_comp_msg_t **msg_ptr, 
 				    Buf buffer);
 
+static void _pack_checkpoint_task_comp(checkpoint_task_comp_msg_t *msg, Buf buffer);
+static int  _unpack_checkpoint_task_comp(checkpoint_task_comp_msg_t **msg_ptr, 
+					 Buf buffer);
+
 static void _pack_suspend_msg(suspend_msg_t *msg, Buf buffer);
 static int  _unpack_suspend_msg(suspend_msg_t **msg_ptr, Buf buffer);
 
@@ -525,6 +532,10 @@ pack_msg(slurm_msg_t const *msg, Buf buffer)
 		_pack_cancel_tasks_msg((kill_tasks_msg_t *) msg->data,
 				       buffer);
 		break;
+	case REQUEST_CHECKPOINT_TASKS:
+		_pack_checkpoint_tasks_msg((checkpoint_tasks_msg_t *) msg->data,
+					   buffer);
+		break;
 	case REQUEST_JOB_STEP_INFO:
 		_pack_job_step_info_req_msg((job_step_info_request_msg_t
 					     *) msg->data, buffer);
@@ -649,6 +660,10 @@ pack_msg(slurm_msg_t const *msg, Buf buffer)
 		_pack_checkpoint_comp((checkpoint_comp_msg_t *)msg->data, 
 				      buffer);
 		break;
+	case REQUEST_CHECKPOINT_TASK_COMP:
+		_pack_checkpoint_task_comp((checkpoint_task_comp_msg_t *)msg->data, 
+				      buffer);
+		break;
 	case RESPONSE_CHECKPOINT:
 	case RESPONSE_CHECKPOINT_COMP:
 		_pack_checkpoint_resp_msg((checkpoint_resp_msg_t *)msg->data, 
@@ -836,6 +851,10 @@ unpack_msg(slurm_msg_t * msg, Buf buffer)
 		rc = _unpack_cancel_tasks_msg((kill_tasks_msg_t **) &
 					      (msg->data), buffer);
 		break;
+	case REQUEST_CHECKPOINT_TASKS:
+		rc = _unpack_checkpoint_tasks_msg((checkpoint_tasks_msg_t **) &
+						  (msg->data), buffer);
+		break;
 	case REQUEST_JOB_STEP_INFO:
 		rc = _unpack_job_step_info_req_msg(
 			(job_step_info_request_msg_t **)
@@ -976,6 +995,10 @@ unpack_msg(slurm_msg_t * msg, Buf buffer)
 		rc = _unpack_checkpoint_comp((checkpoint_comp_msg_t **)
 					     & msg->data, buffer);
 		break;
+	case REQUEST_CHECKPOINT_TASK_COMP:
+		rc = _unpack_checkpoint_task_comp((checkpoint_task_comp_msg_t **)
+						  & msg->data, buffer);
+		break;
 	case RESPONSE_CHECKPOINT:
 	case RESPONSE_CHECKPOINT_COMP:
 		rc = _unpack_checkpoint_resp_msg((checkpoint_resp_msg_t **)
@@ -1529,6 +1552,7 @@ _pack_job_step_create_request_msg(job_step_create_request_msg_t
 	packstr(msg->name, buffer);
 	packstr(msg->network, buffer);
 	packstr(msg->node_list, buffer);
+	packstr(msg->ckpt_path, buffer);
 
 	pack8(msg->overcommit, buffer);
 }
@@ -1563,6 +1587,7 @@ _unpack_job_step_create_request_msg(job_step_create_request_msg_t ** msg,
 	safe_unpackstr_xmalloc(&(tmp_ptr->name), &uint32_tmp, buffer);
 	safe_unpackstr_xmalloc(&(tmp_ptr->network), &uint32_tmp, buffer);
 	safe_unpackstr_xmalloc(&(tmp_ptr->node_list), &uint32_tmp, buffer);
+	safe_unpackstr_xmalloc(&(tmp_ptr->ckpt_path), &uint32_tmp, buffer);
 
 	safe_unpack8(&(tmp_ptr->overcommit), buffer);
 
@@ -1914,6 +1939,7 @@ _unpack_job_step_info_members(job_step_info_t * step, Buf buffer)
 	safe_unpackstr_xmalloc(&step->name, &uint32_tmp, buffer);
 	safe_unpackstr_xmalloc(&step->network, &uint32_tmp, buffer);
 	safe_unpackstr_xmalloc(&node_inx_str, &uint32_tmp, buffer);
+	safe_unpackstr_xmalloc(&step->ckpt_path, &uint32_tmp, buffer);
 	if (node_inx_str == NULL)
 		step->node_inx = bitfmt2int("");
 	else {
@@ -3042,6 +3068,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer)
 
 	pack8(msg->open_mode, buffer);
 	pack8(msg->pty, buffer);
+	packstr(msg->ckpt_path, buffer);
 }
 
 static int
@@ -3137,6 +3164,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t **
 
 	safe_unpack8(&msg->open_mode, buffer);
 	safe_unpack8(&msg->pty, buffer);
+	safe_unpackstr_xmalloc(&msg->ckpt_path, &uint32_tmp, buffer);
 	return SLURM_SUCCESS;
 
 unpack_error:
@@ -3200,6 +3228,35 @@ unpack_error:
 	return SLURM_ERROR;
 }
 
+static void
+_pack_checkpoint_tasks_msg(checkpoint_tasks_msg_t * msg, Buf buffer)
+{
+	pack32((uint32_t)msg->job_id, buffer);
+	pack32((uint32_t)msg->job_step_id, buffer);
+	pack32((uint32_t)msg->signal, buffer);
+	pack_time((time_t)msg->timestamp, buffer);
+}
+
+static int
+_unpack_checkpoint_tasks_msg(checkpoint_tasks_msg_t ** msg_ptr, Buf buffer)
+{
+	checkpoint_tasks_msg_t *msg;
+
+	msg = xmalloc(sizeof(checkpoint_tasks_msg_t));
+	*msg_ptr = msg;
+
+	safe_unpack32(&msg->job_id, buffer);
+	safe_unpack32(&msg->job_step_id, buffer);
+	safe_unpack32(&msg->signal, buffer);
+	safe_unpack_time(&msg->timestamp, buffer);
+	return SLURM_SUCCESS;
+
+unpack_error:
+	xfree(msg);
+	*msg_ptr = NULL;
+	return SLURM_ERROR;
+}
+
 static void
 _pack_shutdown_msg(shutdown_msg_t * msg, Buf buffer)
 {
@@ -4051,6 +4108,44 @@ unpack_error:
 	return SLURM_ERROR;
 }
 
+static void
+_pack_checkpoint_task_comp(checkpoint_task_comp_msg_t *msg, Buf buffer)
+{
+	xassert ( msg != NULL );
+
+	pack32((uint32_t)msg -> job_id,  buffer ) ;
+	pack32((uint32_t)msg -> step_id, buffer ) ;
+	pack32((uint32_t)msg -> task_id, buffer ) ;
+	pack32((uint32_t)msg -> error_code, buffer ) ;
+	packstr ( msg -> error_msg, buffer ) ;
+	pack_time ( msg -> begin_time, buffer ) ;
+}
+
+static int
+_unpack_checkpoint_task_comp(checkpoint_task_comp_msg_t **msg_ptr, Buf buffer)
+{
+	uint32_t uint32_tmp;
+	checkpoint_task_comp_msg_t * msg;
+	xassert ( msg_ptr != NULL );
+
+	msg = xmalloc ( sizeof (checkpoint_task_comp_msg_t) );
+	*msg_ptr = msg ;
+
+	safe_unpack32(& msg -> job_id  , buffer ) ;
+	safe_unpack32(& msg -> step_id , buffer ) ;
+	safe_unpack32(& msg -> task_id , buffer ) ;
+	safe_unpack32(& msg -> error_code , buffer ) ;
+	safe_unpackstr_xmalloc ( & msg -> error_msg, & uint32_tmp , buffer ) ;
+	safe_unpack_time ( & msg -> begin_time , buffer ) ;
+	return SLURM_SUCCESS;
+
+unpack_error:
+	*msg_ptr = NULL;
+	xfree (msg->error_msg);
+	xfree (msg);
+	return SLURM_ERROR;
+}
+
 static void
 _pack_checkpoint_resp_msg(checkpoint_resp_msg_t *msg, Buf buffer)
 {
diff --git a/src/common/slurm_xlator.h b/src/common/slurm_xlator.h
index 8e38ed87b90..597eb038498 100644
--- a/src/common/slurm_xlator.h
+++ b/src/common/slurm_xlator.h
@@ -179,7 +179,7 @@
 #define	list_insert		slurm_list_insert
 #define	list_find		slurm_list_find
 #define	list_remove		slurm_list_remove
-#define	list_delete		slurm_list_delete
+#define	list_delete_item	slurm_list_delete_item
 #define	list_install_fork_handlers slurm_list_install_fork_handlers
 
 /* log.[ch] functions */
diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c
index 46f7d1b4967..c64223e26a4 100644
--- a/src/common/stepd_api.c
+++ b/src/common/stepd_api.c
@@ -315,6 +315,26 @@ rwfail:
 	return -1;
 }
 
+/*
+ * Send a checkpoint request to all tasks of a job step.
+ */
+int
+stepd_checkpoint(int fd, int signal, time_t timestamp)
+{
+	int req = REQUEST_CHECKPOINT_TASKS;
+	int rc;
+
+	safe_write(fd, &req, sizeof(int));
+	safe_write(fd, &signal, sizeof(int));
+	safe_write(fd, &timestamp, sizeof(time_t));
+
+	/* Receive the return code */
+	safe_read(fd, &rc, sizeof(int));
+	return rc;
+ rwfail:
+	return -1;
+}
+
 /*
  * Send a signal to a single task in a job step.
  */
diff --git a/src/common/stepd_api.h b/src/common/stepd_api.h
index 6e550ca21ec..61c9164f8af 100644
--- a/src/common/stepd_api.h
+++ b/src/common/stepd_api.h
@@ -132,6 +132,11 @@ slurmstepd_info_t *stepd_get_info(int fd);
  */
 int stepd_signal(int fd, int signal);
 
+/*
+ * Send a checkpoint request to all tasks of a job step.
+ */
+int stepd_checkpoint(int fd, int signal, time_t timestamp);
+
 /*
  * Send a signal to a single task in a job step.
  */
diff --git a/src/plugins/checkpoint/Makefile.am b/src/plugins/checkpoint/Makefile.am
index 3ce36725a72..0527fc065be 100644
--- a/src/plugins/checkpoint/Makefile.am
+++ b/src/plugins/checkpoint/Makefile.am
@@ -1,3 +1,3 @@
 # Makefile for checkpoint plugins
 
-SUBDIRS = aix none ompi
+SUBDIRS = aix none ompi xlch
diff --git a/src/plugins/checkpoint/Makefile.in b/src/plugins/checkpoint/Makefile.in
index ba8e59c1306..97b43e4c017 100644
--- a/src/plugins/checkpoint/Makefile.in
+++ b/src/plugins/checkpoint/Makefile.in
@@ -241,7 +241,7 @@ target_os = @target_os@
 target_vendor = @target_vendor@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-SUBDIRS = aix none ompi
+SUBDIRS = aix none ompi xlch
 all: all-recursive
 
 .SUFFIXES:
diff --git a/src/plugins/checkpoint/aix/checkpoint_aix.c b/src/plugins/checkpoint/aix/checkpoint_aix.c
index 46f21cb5f6b..097ec8b3adc 100644
--- a/src/plugins/checkpoint/aix/checkpoint_aix.c
+++ b/src/plugins/checkpoint/aix/checkpoint_aix.c
@@ -507,3 +507,8 @@ static void _ckpt_dequeue_timeout(uint32_t job_id, uint32_t step_id,
 	slurm_mutex_unlock(&ckpt_agent_mutex);
 }
 
+extern int slurm_ckpt_task_comp ( struct step_record * step_ptr, uint32_t task_id,
+				  time_t event_time, uint32_t error_code, char *error_msg )
+{
+	return SLURM_SUCCESS;
+}
diff --git a/src/plugins/checkpoint/none/checkpoint_none.c b/src/plugins/checkpoint/none/checkpoint_none.c
index 17cec046850..938a5a058a9 100644
--- a/src/plugins/checkpoint/none/checkpoint_none.c
+++ b/src/plugins/checkpoint/none/checkpoint_none.c
@@ -134,3 +134,10 @@ extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer)
 {
 	return SLURM_SUCCESS;
 }
+
+extern int slurm_ckpt_task_comp ( struct step_record * step_ptr, uint32_t task_id,
+				  time_t event_time, uint32_t error_code, char *error_msg )
+{
+	return SLURM_SUCCESS;
+}
+
diff --git a/src/plugins/checkpoint/ompi/checkpoint_ompi.c b/src/plugins/checkpoint/ompi/checkpoint_ompi.c
index cdc71787393..05ba611d977 100644
--- a/src/plugins/checkpoint/ompi/checkpoint_ompi.c
+++ b/src/plugins/checkpoint/ompi/checkpoint_ompi.c
@@ -307,3 +307,10 @@ static int _ckpt_step(struct step_record * step_ptr, uint16_t wait, int vacate)
 		job_ptr->job_id, step_ptr->step_id);
 	return SLURM_SUCCESS;
 }
+
+extern int slurm_ckpt_task_comp ( struct step_record * step_ptr, uint32_t task_id,
+				  time_t event_time, uint32_t error_code, char *error_msg )
+{
+	return SLURM_SUCCESS;
+}
+
diff --git a/src/plugins/checkpoint/xlch/Makefile.am b/src/plugins/checkpoint/xlch/Makefile.am
new file mode 100644
index 00000000000..5bcc9bae2f2
--- /dev/null
+++ b/src/plugins/checkpoint/xlch/Makefile.am
@@ -0,0 +1,24 @@
+# Makefile for checkpoint/xlch plugin
+
+AUTOMAKE_OPTIONS = foreign
+
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic 
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
+
+pkglib_LTLIBRARIES = checkpoint_xlch.la
+checkpoint_xlch_la_SOURCES = checkpoint_xlch.c config.c
+checkpoint_xlch_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
+
+convenience_libs = $(top_builddir)/src/api/libslurmhelper.la
+
+checkpoint_xlch_la_LIBADD = $(convenience_libs)
+
+config.c: Makefile
+	@( echo "char *scch_path = \"$(prefix)/sbin/scch\";"\
+         ) > config.c
+
+force:
+
+$(checkpoint_xlch_LDADD) : force
+	@cd `dirname $@` && $(MAKE) `basename $@`
diff --git a/src/plugins/checkpoint/xlch/Makefile.in b/src/plugins/checkpoint/xlch/Makefile.in
new file mode 100644
index 00000000000..3dfea3dc26e
--- /dev/null
+++ b/src/plugins/checkpoint/xlch/Makefile.in
@@ -0,0 +1,564 @@
+# Makefile.in generated by automake 1.10 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Makefile for checkpoint/xlch plugin
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = src/plugins/checkpoint/xlch
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
+	$(top_srcdir)/auxdir/slurm.m4 \
+	$(top_srcdir)/auxdir/x_ac__system_configuration.m4 \
+	$(top_srcdir)/auxdir/x_ac_affinity.m4 \
+	$(top_srcdir)/auxdir/x_ac_aix.m4 \
+	$(top_srcdir)/auxdir/x_ac_bluegene.m4 \
+	$(top_srcdir)/auxdir/x_ac_databases.m4 \
+	$(top_srcdir)/auxdir/x_ac_debug.m4 \
+	$(top_srcdir)/auxdir/x_ac_elan.m4 \
+	$(top_srcdir)/auxdir/x_ac_federation.m4 \
+	$(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \
+	$(top_srcdir)/auxdir/x_ac_gtk.m4 \
+	$(top_srcdir)/auxdir/x_ac_munge.m4 \
+	$(top_srcdir)/auxdir/x_ac_ncurses.m4 \
+	$(top_srcdir)/auxdir/x_ac_pam.m4 \
+	$(top_srcdir)/auxdir/x_ac_perl.m4 \
+	$(top_srcdir)/auxdir/x_ac_ptrace.m4 \
+	$(top_srcdir)/auxdir/x_ac_readline.m4 \
+	$(top_srcdir)/auxdir/x_ac_setpgrp.m4 \
+	$(top_srcdir)/auxdir/x_ac_setproctitle.m4 \
+	$(top_srcdir)/auxdir/x_ac_sgi_job.m4 \
+	$(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \
+	$(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
+CONFIG_CLEAN_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
+am__installdirs = "$(DESTDIR)$(pkglibdir)"
+pkglibLTLIBRARIES_INSTALL = $(INSTALL)
+LTLIBRARIES = $(pkglib_LTLIBRARIES)
+checkpoint_xlch_la_DEPENDENCIES = $(convenience_libs)
+am_checkpoint_xlch_la_OBJECTS = checkpoint_xlch.lo config.lo
+checkpoint_xlch_la_OBJECTS = $(am_checkpoint_xlch_la_OBJECTS)
+checkpoint_xlch_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(checkpoint_xlch_la_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I. -I$(top_builddir) -I$(top_builddir)/slurm@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
+SOURCES = $(checkpoint_xlch_la_SOURCES)
+DIST_SOURCES = $(checkpoint_xlch_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTHD_CFLAGS = @AUTHD_CFLAGS@
+AUTHD_LIBS = @AUTHD_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BG_INCLUDES = @BG_INCLUDES@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CMD_LDFLAGS = @CMD_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ELAN_LIBS = @ELAN_LIBS@
+EXEEXT = @EXEEXT@
+F77 = @F77@
+FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@
+FFLAGS = @FFLAGS@
+GREP = @GREP@
+GTK2_CFLAGS = @GTK2_CFLAGS@
+GTK2_LIBS = @GTK2_LIBS@
+HAVEPGCONFIG = @HAVEPGCONFIG@
+HAVEPKGCONFIG = @HAVEPKGCONFIG@
+HAVE_AIX = @HAVE_AIX@
+HAVE_ELAN = @HAVE_ELAN@
+HAVE_FEDERATION = @HAVE_FEDERATION@
+HAVE_OPENSSL = @HAVE_OPENSSL@
+HAVE_PERL_CORE_DIR = @HAVE_PERL_CORE_DIR@
+HAVE_SOME_CURSES = @HAVE_SOME_CURSES@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_LDFLAGS = @LIB_LDFLAGS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
+MUNGE_LIBS = @MUNGE_LIBS@
+MYSQL_LIBS = @MYSQL_LIBS@
+NCURSES = @NCURSES@
+NUMA_LIBS = @NUMA_LIBS@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PAM_LIBS = @PAM_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL_INCLUDES = @PERL_INCLUDES@
+PERL_LIBS = @PERL_LIBS@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PLPA_LIBS = @PLPA_LIBS@
+PROCTRACKDIR = @PROCTRACKDIR@
+PROJECT = @PROJECT@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+READLINE_LIBS = @READLINE_LIBS@
+RELEASE = @RELEASE@
+SED = @SED@
+SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
+SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SLURMCTLD_PORT = @SLURMCTLD_PORT@
+SLURMD_PORT = @SLURMD_PORT@
+SLURM_API_AGE = @SLURM_API_AGE@
+SLURM_API_CURRENT = @SLURM_API_CURRENT@
+SLURM_API_MAJOR = @SLURM_API_MAJOR@
+SLURM_API_REVISION = @SLURM_API_REVISION@
+SLURM_API_VERSION = @SLURM_API_VERSION@
+SLURM_MAJOR = @SLURM_MAJOR@
+SLURM_MICRO = @SLURM_MICRO@
+SLURM_MINOR = @SLURM_MINOR@
+SLURM_VERSION = @SLURM_VERSION@
+SO_LDFLAGS = @SO_LDFLAGS@
+SSL_CPPFLAGS = @SSL_CPPFLAGS@
+SSL_LDFLAGS = @SSL_LDFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+UTIL_LIBS = @UTIL_LIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_F77 = @ac_ct_F77@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AUTOMAKE_OPTIONS = foreign
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic 
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
+pkglib_LTLIBRARIES = checkpoint_xlch.la
+checkpoint_xlch_la_SOURCES = checkpoint_xlch.c config.c
+checkpoint_xlch_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
+convenience_libs = $(top_builddir)/src/api/libslurmhelper.la
+checkpoint_xlch_la_LIBADD = $(convenience_libs)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  src/plugins/checkpoint/xlch/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  src/plugins/checkpoint/xlch/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	test -z "$(pkglibdir)" || $(MKDIR_P) "$(DESTDIR)$(pkglibdir)"
+	@list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+	  if test -f $$p; then \
+	    f=$(am__strip_dir) \
+	    echo " $(LIBTOOL) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(pkglibdir)/$$f'"; \
+	    $(LIBTOOL) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(pkglibdir)/$$f"; \
+	  else :; fi; \
+	done
+
+uninstall-pkglibLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+	  p=$(am__strip_dir) \
+	  echo " $(LIBTOOL) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$p'"; \
+	  $(LIBTOOL) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$p"; \
+	done
+
+clean-pkglibLTLIBRARIES:
+	-test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES)
+	@list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+	  dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+	  test "$$dir" != "$$p" || dir=.; \
+	  echo "rm -f \"$${dir}/so_locations\""; \
+	  rm -f "$${dir}/so_locations"; \
+	done
+checkpoint_xlch.la: $(checkpoint_xlch_la_OBJECTS) $(checkpoint_xlch_la_DEPENDENCIES) 
+	$(checkpoint_xlch_la_LINK) -rpath $(pkglibdir) $(checkpoint_xlch_la_OBJECTS) $(checkpoint_xlch_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/checkpoint_xlch.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/config.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+	for dir in "$(DESTDIR)$(pkglibdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \
+	mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-exec-am: install-pkglibLTLIBRARIES
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-pkglibLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+	clean-libtool clean-pkglibLTLIBRARIES ctags distclean \
+	distclean-compile distclean-generic distclean-libtool \
+	distclean-tags distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-pkglibLTLIBRARIES \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES
+
+
+config.c: Makefile
+	@( echo "char *scch_path = \"$(prefix)/sbin/scch\";"\
+         ) > config.c
+
+force:
+
+$(checkpoint_xlch_LDADD) : force
+	@cd `dirname $@` && $(MAKE) `basename $@`
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/plugins/checkpoint/xlch/checkpoint_xlch.c b/src/plugins/checkpoint/xlch/checkpoint_xlch.c
new file mode 100644
index 00000000000..c0f17985d56
--- /dev/null
+++ b/src/plugins/checkpoint/xlch/checkpoint_xlch.c
@@ -0,0 +1,696 @@
+/*****************************************************************************\
+ *  checkpoint_xlch.c - XLCH slurm checkpoint plugin.
+ *  $Id: checkpoint_xlch.c 0001 2006-10-31 10:55:11Z hjcao $
+ *****************************************************************************
+ *  Copied from checkpoint_aix.c
+ *  
+ *  Copyright (C) 2004 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Morris Jette <jette1@llnl.gov>
+ *  UCRL-CODE-226842.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission 
+ *  to link the code of portions of this program with the OpenSSL library under 
+ *  certain conditions as described in each individual source file, and 
+ *  distribute linked combinations including the two. You must obey the GNU 
+ *  General Public License in all respects for all of the code used other than 
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this 
+ *  exception to your version of the file(s), but you are not obligated to do 
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in 
+ *  the program, then also delete it here.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#if HAVE_STDINT_H
+#  include <stdint.h>
+#endif
+#if HAVE_INTTYPES_H
+#  include <inttypes.h>
+#endif
+#ifdef WITH_PTHREADS
+#  include <pthread.h>
+#endif
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <slurm/slurm.h>
+#include <slurm/slurm_errno.h>
+
+#include "src/common/list.h"
+#include "src/common/log.h"
+#include "src/common/pack.h"
+#include "src/common/xassert.h"
+#include "src/common/xstring.h"
+#include "src/common/xmalloc.h"
+#include "src/slurmctld/agent.h"
+#include "src/slurmctld/slurmctld.h"
+
+#define SIGCKPT 20
+
+struct check_job_info {
+	uint16_t disabled;	/* counter, checkpointable only if zero */
+	uint16_t task_cnt;
+	uint16_t reply_cnt;
+	uint16_t wait_time;
+	time_t   time_stamp;	/* begin or end checkpoint time */
+	uint32_t error_code;
+	char    *error_msg;
+	uint16_t sig_done;
+	bitstr_t *replied;	/* which task has replied the checkpoint.
+				   XXX: only valid if in operation */
+	pthread_mutex_t mutex;
+};
+
+static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal, 
+		      char *nodelist);
+static void _send_ckpt(uint32_t job_id, uint32_t step_id, uint16_t signal, 
+		       time_t timestamp, char *nodelist);
+static int _step_ckpt(struct step_record * step_ptr, uint16_t wait, 
+		      uint16_t signal, uint16_t sig_timeout);
+
+/* checkpoint request timeout processing */
+static pthread_t	ckpt_agent_tid = 0;
+static pthread_mutex_t	ckpt_agent_mutex = PTHREAD_MUTEX_INITIALIZER;
+static List		ckpt_timeout_list = NULL;
+struct ckpt_timeout_info {
+	uint32_t   job_id;
+	uint32_t   step_id;
+	uint16_t   signal;
+	time_t     start_time;
+	time_t     end_time;
+	char*      nodelist;
+};
+static void *_ckpt_agent_thr(void *arg);
+static void _ckpt_enqueue_timeout(uint32_t job_id, uint32_t step_id, 
+				  time_t start_time, uint16_t signal,
+				  uint16_t wait_time, char *nodelist);
+static void  _ckpt_dequeue_timeout(uint32_t job_id, uint32_t step_id,
+				   time_t start_time);
+static void  _ckpt_timeout_free(void *rec);
+static void  _ckpt_signal_step(struct ckpt_timeout_info *rec);
+
+static int _on_ckpt_complete(struct step_record *step_ptr, uint32_t error_code);
+
+extern char *scch_path;
+
+/*
+ * These variables are required by the generic plugin interface.  If they
+ * are not found in the plugin, the plugin loader will ignore it.
+ *
+ * plugin_name - a string giving a human-readable description of the
+ * plugin.  There is no maximum length, but the symbol must refer to
+ * a valid string.
+ *
+ * plugin_type - a string suggesting the type of the plugin or its
+ * applicability to a particular form of data or method of data handling.
+ * If the low-level plugin API is used, the contents of this string are
+ * unimportant and may be anything.  SLURM uses the higher-level plugin
+ * interface which requires this string to be of the form
+ *
+ *	<application>/<method>
+ *
+ * where <application> is a description of the intended application of
+ * the plugin (e.g., "checkpoint" for SLURM checkpoint) and <method>
+ * is a description of how this plugin satisfies that application.  SLURM will
+ * only load checkpoint plugins if the plugin_type string has a 
+ * prefix of "checkpoint/".
+ *
+ * plugin_version - an unsigned 32-bit integer giving the version number
+ * of the plugin.  If major and minor revisions are desired, the major
+ * version number may be multiplied by a suitable magnitude constant such
+ * as 100 or 1000.  Various SLURM versions will likely require a certain
+ * minimum versions for their plugins as the checkpoint API matures.
+ */
+const char plugin_name[]       	= "XLCH checkpoint plugin";
+const char plugin_type[]       	= "checkpoint/xlch";
+const uint32_t plugin_version	= 10;
+
+/*
+ * init() is called when the plugin is loaded, before any other functions
+ * are called.  Put global initialization here.
+ */
+extern int init ( void )
+{
+	pthread_attr_t attr;
+
+	slurm_attr_init(&attr);
+	if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
+		error("pthread_attr_setdetachstate: %m");
+	if (pthread_create(&ckpt_agent_tid, &attr, _ckpt_agent_thr, NULL)) {
+		error("pthread_create: %m");
+		return SLURM_ERROR;
+	}
+	slurm_attr_destroy(&attr);
+
+	return SLURM_SUCCESS;
+}
+
+
+extern int fini ( void )
+{
+	int i;
+
+	if (!&ckpt_agent_tid)
+		return SLURM_SUCCESS;
+
+	for (i=0; i<4; i++) {
+		if (pthread_cancel(ckpt_agent_tid)) {
+			ckpt_agent_tid = 0;
+			return SLURM_SUCCESS;
+		}
+		usleep(1000);
+	}
+	error("Could not kill checkpoint pthread");
+	return SLURM_ERROR;
+}
+
+/*
+ * The remainder of this file implements the standard SLURM checkpoint API.
+ */
+
+extern int slurm_ckpt_op ( uint16_t op, uint16_t data,
+			   struct step_record * step_ptr, time_t * event_time, 
+			   uint32_t *error_code, char **error_msg )
+{
+	int rc = SLURM_SUCCESS;
+	struct check_job_info *check_ptr;
+
+	xassert(step_ptr);
+	check_ptr = (struct check_job_info *) step_ptr->check_job;
+	check_ptr->task_cnt = step_ptr->step_layout->task_cnt; /* set it early */
+	xassert(check_ptr);
+
+	slurm_mutex_lock (&check_ptr->mutex);
+	
+	switch (op) {
+		case CHECK_ABLE:
+			if (check_ptr->disabled)
+				rc = ESLURM_DISABLED;
+			else {
+				if (check_ptr->reply_cnt < check_ptr->task_cnt)
+					*event_time = check_ptr->time_stamp;
+				rc = SLURM_SUCCESS;
+			}
+			break;
+		case CHECK_DISABLE:
+			check_ptr->disabled++;
+			break;
+		case CHECK_ENABLE:
+			check_ptr->disabled--;
+			break;
+		case CHECK_CREATE:
+			if (check_ptr->time_stamp != 0) {
+				rc = EALREADY;
+				break;
+			}
+			check_ptr->time_stamp = time(NULL);
+			check_ptr->reply_cnt = 0;
+			check_ptr->replied = bit_alloc(check_ptr->task_cnt);
+			check_ptr->error_code = 0;
+			check_ptr->sig_done = 0;
+			xfree(check_ptr->error_msg);
+			rc = _step_ckpt(step_ptr, data, SIGCKPT, SIGKILL);
+			break;
+		case CHECK_VACATE:
+			if (check_ptr->time_stamp != 0) {
+				rc = EALREADY;
+				break;
+			}
+			check_ptr->time_stamp = time(NULL);
+			check_ptr->reply_cnt = 0;
+			check_ptr->replied = bit_alloc(check_ptr->task_cnt);
+			check_ptr->error_code = 0;
+			check_ptr->sig_done = SIGTERM; /* exit elegantly */
+			xfree(check_ptr->error_msg);
+			rc = _step_ckpt(step_ptr, data, SIGCKPT, SIGKILL);
+			break;
+		case CHECK_RESTART:
+			rc = ESLURM_NOT_SUPPORTED;
+			break;
+		case CHECK_ERROR:
+			xassert(error_code);
+			xassert(error_msg);
+			*error_code = check_ptr->error_code;
+			xfree(*error_msg);
+			*error_msg = xstrdup(check_ptr->error_msg);
+			break;
+		default:
+			error("Invalid checkpoint operation: %d", op);
+			rc = EINVAL;
+	}
+
+	slurm_mutex_unlock (&check_ptr->mutex);
+
+	return rc;
+}
+
+/* this function will not be called by us */
+extern int slurm_ckpt_comp ( struct step_record * step_ptr, time_t event_time,
+		uint32_t error_code, char *error_msg )
+{
+	error("checkpoint/xlch: slurm_ckpt_comp not implemented");
+	return SLURM_FAILURE; 
+}
+
+extern int slurm_ckpt_task_comp ( struct step_record * step_ptr, uint32_t task_id,
+				  time_t event_time, uint32_t error_code, char *error_msg )
+{
+	struct check_job_info *check_ptr;
+	int rc = SLURM_SUCCESS;
+
+	xassert(step_ptr);
+	check_ptr = (struct check_job_info *) step_ptr->check_job;
+	xassert(check_ptr);
+
+	/* XXX: we need a mutex here, since in proc_req only JOB_READ locked */
+	debug3("slurm_ckpt_task_comp: job %u.%hu, task %u, error %d",
+	       step_ptr->job_ptr->job_id, step_ptr->step_id, task_id,
+	       error_code);
+
+	slurm_mutex_lock (&check_ptr->mutex);
+
+	/*
+	 * for now we do not use event_time to identify operation and always 
+	 * set it 0
+	 * TODO: consider send event_time to the task via sigqueue().
+	 */
+	if (event_time && (event_time != check_ptr->time_stamp)) {
+		rc = ESLURM_ALREADY_DONE;
+		goto out;
+	}
+
+	if (!check_ptr->replied || bit_test (check_ptr->replied, task_id)) {
+		rc = ESLURM_ALREADY_DONE;
+		goto out;
+	}
+	
+	if ((uint16_t)task_id >= check_ptr->task_cnt) {
+		error("invalid task_id %u, task_cnt: %hu", task_id, 
+		      check_ptr->task_cnt);
+		rc = EINVAL;
+		goto out;
+	}
+	bit_set (check_ptr->replied, task_id);
+	check_ptr->reply_cnt ++;
+
+	/* TODO: check the error_code */
+	if (error_code > check_ptr->error_code) {
+		info("slurm_ckpt_task_comp error %u: %s", error_code, error_msg);
+		check_ptr->error_code = error_code;
+		xfree(check_ptr->error_msg);
+		check_ptr->error_msg = xstrdup(error_msg);
+	}
+
+	/* We need an error-free reply from each task to note completion */
+	if (check_ptr->reply_cnt == check_ptr->task_cnt) { /* all tasks done */
+		time_t now = time(NULL);
+		long delay = (long) difftime(now, check_ptr->time_stamp);
+		info("Checkpoint complete for job %u.%u in %ld seconds",
+		     step_ptr->job_ptr->job_id, step_ptr->step_id,
+		     delay);
+		/* remove the timeout */
+		_ckpt_dequeue_timeout(step_ptr->job_ptr->job_id,
+				      step_ptr->step_id, check_ptr->time_stamp);
+		/* free the replied bitstr */
+		FREE_NULL_BITMAP (check_ptr->replied);
+
+		if (check_ptr->sig_done) {
+			info ("checkpoint step %u.%hu done, sending signal %hu", 
+			      step_ptr->job_ptr->job_id,
+			      step_ptr->step_id, check_ptr->sig_done);
+			_send_sig(step_ptr->job_ptr->job_id, step_ptr->step_id,
+				  check_ptr->sig_done, 
+				  step_ptr->step_layout->node_list);
+		}
+
+		_on_ckpt_complete(step_ptr, check_ptr->error_code); /* how about we execute a program? */
+
+		check_ptr->time_stamp = 0; /* this enables checkpoint again */
+	}
+
+ out:
+	slurm_mutex_unlock (&check_ptr->mutex);
+	return rc; 
+}
+
+extern int slurm_ckpt_alloc_job(check_jobinfo_t *jobinfo)
+{
+	struct check_job_info *check_ptr;
+
+	check_ptr = xmalloc(sizeof(struct check_job_info));
+	slurm_mutex_init (&check_ptr->mutex);
+	*jobinfo = (check_jobinfo_t) check_ptr;
+	return SLURM_SUCCESS;
+}
+
+extern int slurm_ckpt_free_job(check_jobinfo_t jobinfo)
+{
+	struct check_job_info *check_ptr = (struct check_job_info *)jobinfo;
+	if (check_ptr) {
+		xfree (check_ptr->error_msg);
+		FREE_NULL_BITMAP (check_ptr->replied);
+	}
+	xfree(jobinfo);
+	return SLURM_SUCCESS;
+}
+
+extern int slurm_ckpt_pack_job(check_jobinfo_t jobinfo, Buf buffer)
+{
+	struct check_job_info *check_ptr = 
+		(struct check_job_info *)jobinfo;
+ 
+	pack16(check_ptr->disabled, buffer);
+	pack16(check_ptr->task_cnt, buffer);
+	pack16(check_ptr->reply_cnt, buffer);
+	pack16(check_ptr->wait_time, buffer);
+	pack_bit_fmt(check_ptr->replied, buffer);
+
+	pack32(check_ptr->error_code, buffer);
+	packstr(check_ptr->error_msg, buffer);
+	pack_time(check_ptr->time_stamp, buffer);
+
+	return SLURM_SUCCESS;
+}
+
+extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer)
+{
+	uint32_t uint32_tmp;
+	char *task_inx_str;
+	struct check_job_info *check_ptr =
+		(struct check_job_info *)jobinfo;
+
+	safe_unpack16(&check_ptr->disabled, buffer);
+	safe_unpack16(&check_ptr->task_cnt, buffer);
+	safe_unpack16(&check_ptr->reply_cnt, buffer);
+	safe_unpack16(&check_ptr->wait_time, buffer);
+	safe_unpackstr_xmalloc(&task_inx_str, &uint32_tmp, buffer);
+	if (task_inx_str == NULL)
+		check_ptr->replied = NULL;
+	else {
+		check_ptr->replied = bit_alloc(check_ptr->task_cnt);
+		bit_unfmt(check_ptr->replied, task_inx_str);
+		xfree(task_inx_str);
+	}
+
+	safe_unpack32(&check_ptr->error_code, buffer);
+	safe_unpackstr_xmalloc(&check_ptr->error_msg, &uint32_tmp, buffer);
+	safe_unpack_time(&check_ptr->time_stamp, buffer);
+	
+	return SLURM_SUCCESS; 
+
+    unpack_error:
+	xfree(check_ptr->error_msg);
+	return SLURM_ERROR;
+}
+
+/* Send a checkpoint RPC to a specific job step */
+static void _send_ckpt(uint32_t job_id, uint32_t step_id, uint16_t signal, 
+		       time_t timestamp, char *nodelist)
+{
+	agent_arg_t *agent_args;
+	checkpoint_tasks_msg_t *ckpt_tasks_msg;
+
+	ckpt_tasks_msg = xmalloc(sizeof(checkpoint_tasks_msg_t));
+	ckpt_tasks_msg->job_id		= job_id;
+	ckpt_tasks_msg->job_step_id	= step_id;
+	ckpt_tasks_msg->signal		= signal;
+	ckpt_tasks_msg->timestamp       = timestamp;
+
+	agent_args = xmalloc(sizeof(agent_arg_t));
+	agent_args->msg_type		= REQUEST_CHECKPOINT_TASKS;
+	agent_args->retry		= 1; /* keep retrying until all nodes receives the request */
+	agent_args->msg_args		= ckpt_tasks_msg;
+	agent_args->hostlist 		= hostlist_create(nodelist);
+	agent_args->node_count		= hostlist_count(agent_args->hostlist);
+
+	agent_queue_request(agent_args);
+}
+
+/* Send a signal RPC to a list of nodes */
+static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal, 
+		      char *nodelist)
+{
+	agent_arg_t *agent_args;
+	kill_tasks_msg_t *kill_tasks_msg;
+
+	kill_tasks_msg = xmalloc(sizeof(kill_tasks_msg_t));
+	kill_tasks_msg->job_id		= job_id;
+	kill_tasks_msg->job_step_id	= step_id;
+	kill_tasks_msg->signal		= signal;
+
+	agent_args = xmalloc(sizeof(agent_arg_t));
+	agent_args->msg_type		= REQUEST_SIGNAL_TASKS;
+	agent_args->retry		= 1;
+	agent_args->msg_args		= kill_tasks_msg;
+	agent_args->hostlist            = hostlist_create(nodelist);
+	agent_args->node_count		= hostlist_count(agent_args->hostlist);
+
+	agent_queue_request(agent_args);
+}
+
+/* Send checkpoint request to the processes of a job step.
+ * If the request times out, send sig_timeout. */
+static int _step_ckpt(struct step_record * step_ptr, uint16_t wait, 
+		      uint16_t signal, uint16_t sig_timeout)
+{
+	struct check_job_info *check_ptr;
+	struct job_record *job_ptr;
+
+	xassert(step_ptr);
+	check_ptr = (struct check_job_info *) step_ptr->check_job;
+	xassert(check_ptr);
+	job_ptr = step_ptr->job_ptr;
+	xassert(job_ptr);
+
+	if (IS_JOB_FINISHED(job_ptr))
+		return ESLURM_ALREADY_DONE;
+
+	if (check_ptr->disabled)
+		return ESLURM_DISABLED;
+
+	if (!check_ptr->task_cnt) {
+		error("_step_ckpt: job %u.%u has no tasks to checkpoint", 
+			job_ptr->job_id,
+			step_ptr->step_id);
+		return ESLURM_INVALID_NODE_NAME;
+	}
+	char* nodelist = xstrdup (step_ptr->step_layout->node_list);
+	check_ptr->wait_time  = wait; /* TODO: how about change wait_time according to task_cnt? */
+
+	_send_ckpt(step_ptr->job_ptr->job_id, step_ptr->step_id,
+		   signal, check_ptr->time_stamp, nodelist);
+
+	_ckpt_enqueue_timeout(step_ptr->job_ptr->job_id, 
+			      step_ptr->step_id, check_ptr->time_stamp, 
+			      sig_timeout, check_ptr->wait_time, nodelist);  
+	
+	info("checkpoint requested for job %u.%u", job_ptr->job_id,
+	     step_ptr->step_id);
+	xfree (nodelist);
+	return SLURM_SUCCESS;
+}
+
+
+static void _ckpt_signal_step(struct ckpt_timeout_info *rec)
+{
+	/* debug("signal %u.%u %u", rec->job_id, rec->step_id, rec->signal); */
+	_send_sig(rec->job_id, rec->step_id, rec->signal, rec->nodelist);
+}
+
+/* Checkpoint processing pthread
+ * Never returns, but is cancelled on plugin termiantion */
+static void *_ckpt_agent_thr(void *arg)
+{
+	ListIterator iter;
+	struct ckpt_timeout_info *rec;
+	time_t now;
+
+	while (1) {
+		sleep(1);
+		if (!ckpt_timeout_list)
+			continue;
+
+		now = time(NULL);
+		iter = list_iterator_create(ckpt_timeout_list);
+		slurm_mutex_lock(&ckpt_agent_mutex);
+		/* look for and process any timeouts */
+		while ((rec = list_next(iter))) {
+			if (rec->end_time > now)
+				continue;
+			info("checkpoint timeout for %u.%u", 
+				rec->job_id, rec->step_id);
+			_ckpt_signal_step(rec);
+			list_delete_item(iter);
+		}
+		slurm_mutex_unlock(&ckpt_agent_mutex);
+		list_iterator_destroy(iter);
+	}
+}
+
+/* Queue a checkpoint request timeout */
+static void _ckpt_enqueue_timeout(uint32_t job_id, uint32_t step_id, 
+				  time_t start_time, uint16_t signal,
+				  uint16_t wait_time, char *nodelist)
+{
+	struct ckpt_timeout_info *rec;
+
+	if ((wait_time == 0) || (signal == 0)) /* if signal == 0, don't enqueue it */
+		return;
+
+	slurm_mutex_lock(&ckpt_agent_mutex);
+	if (!ckpt_timeout_list)
+		ckpt_timeout_list = list_create(_ckpt_timeout_free);
+	rec = xmalloc(sizeof(struct ckpt_timeout_info));
+	rec->job_id	= job_id;
+	rec->step_id	= step_id;
+	rec->signal     = signal;
+	rec->start_time	= start_time;
+	rec->end_time	= start_time + wait_time;
+	rec->nodelist	= xstrdup(nodelist);
+	/* debug("enqueue %u.%u %u", job_id, step_id, wait_time); */
+	list_enqueue(ckpt_timeout_list, rec);
+	slurm_mutex_unlock(&ckpt_agent_mutex);
+}
+
+static void _ckpt_timeout_free(void *rec)
+{
+	struct ckpt_timeout_info *ckpt_rec = (struct ckpt_timeout_info *)rec;
+	
+	if (ckpt_rec) {
+		xfree(ckpt_rec->nodelist);
+		xfree(ckpt_rec);
+	}
+}
+
+/* De-queue a checkpoint timeout request. The operation completed */
+static void _ckpt_dequeue_timeout(uint32_t job_id, uint32_t step_id,
+		time_t start_time)
+{
+	ListIterator iter;
+	struct ckpt_timeout_info *rec;
+
+	slurm_mutex_lock(&ckpt_agent_mutex);
+	if (!ckpt_timeout_list)
+		goto fini;
+	iter = list_iterator_create(ckpt_timeout_list);
+	while ((rec = list_next(iter))) {
+		if ((rec->job_id != job_id) || (rec->step_id != step_id)
+		    ||  (start_time && (rec->start_time != start_time)))
+			continue;
+		/* debug("dequeue %u.%u", job_id, step_id); */
+		list_delete_item(iter);
+		break;
+	}
+	list_iterator_destroy(iter);
+ fini:
+	slurm_mutex_unlock(&ckpt_agent_mutex);
+}
+
+
+/* a checkpoint completed, process the images files */
+static int _on_ckpt_complete(struct step_record *step_ptr, uint32_t error_code)
+{
+	int status;
+	pid_t cpid;
+
+	if (access(scch_path, R_OK | X_OK) < 0) {
+		info("Access denied for %s: %m", scch_path);
+		return SLURM_ERROR;
+	}
+
+	if ((cpid = fork()) < 0) {
+		error ("_on_ckpt_complete: fork: %m");
+		return SLURM_ERROR;
+	}
+	
+	if (cpid == 0) {
+		/*
+		 * We don't fork and wait the child process because the job 
+		 * read lock is held. It could take minutes to delete/move 
+		 * the checkpoint image files. So there is a race condition
+		 * of the user requesting another checkpoint before SCCH
+		 * finishes.
+		 */
+		/* fork twice to avoid zombies */
+		if ((cpid = fork()) < 0) {
+			error ("_on_ckpt_complete: second fork: %m");
+			exit(127);
+		}
+		/* grand child execs */
+		if (cpid == 0) {
+			char *args[6];
+			char str_job[11];
+			char str_step[11];
+			char str_err[11];
+		
+			/*
+			 * XXX: if slurmctld is running as root, we must setuid here.
+			 * But what if slurmctld is running as SlurmUser?
+			 * How about we make scch setuid and pass the user/group to it?
+			 */
+			if (geteuid() == 0) { /* root */
+				if (setgid(step_ptr->job_ptr->group_id) < 0) {
+					error ("_on_ckpt_complete: failed to "
+						"setgid: %m");
+					exit(127);
+				}
+				if (setuid(step_ptr->job_ptr->user_id) < 0) {
+					error ("_on_ckpt_complete: failed to "
+						"setuid: %m");
+					exit(127);
+				}
+			}
+			snprintf(str_job,  sizeof(str_job),  "%u",  
+				 step_ptr->job_ptr->job_id);
+			snprintf(str_step, sizeof(str_step), "%hu", 
+				 step_ptr->step_id);
+			snprintf(str_err,  sizeof(str_err),  "%u",  
+				 error_code);
+
+			args[0] = scch_path;
+			args[1] = str_job;
+			args[2] = str_step;
+			args[3] = str_err;
+			args[4] = step_ptr->ckpt_path;
+			args[5] = NULL;
+
+			execv(scch_path, args);
+			error("help! %m");
+			exit(127);
+		}
+		/* child just exits */
+		exit(0);
+	}
+
+	while(1) {
+		if (waitpid(cpid, &status, 0) < 0 && errno == EINTR)
+			continue;
+		break;
+	}
+
+	return SLURM_SUCCESS;
+}
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index c1f8e042718..ad1779003cc 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -89,6 +89,7 @@ static int          _make_step_cred(struct step_record *step_rec,
 inline static void  _slurm_rpc_allocate_resources(slurm_msg_t * msg);
 inline static void  _slurm_rpc_checkpoint(slurm_msg_t * msg);
 inline static void  _slurm_rpc_checkpoint_comp(slurm_msg_t * msg);
+inline static void  _slurm_rpc_checkpoint_task_comp(slurm_msg_t * msg);
 inline static void  _slurm_rpc_delete_partition(slurm_msg_t * msg);
 inline static void  _slurm_rpc_complete_job_allocation(slurm_msg_t * msg);
 inline static void  _slurm_rpc_complete_batch_script(slurm_msg_t * msg);
@@ -251,6 +252,10 @@ void slurmctld_req (slurm_msg_t * msg)
 		_slurm_rpc_checkpoint_comp(msg);
 		slurm_free_checkpoint_comp_msg(msg->data);
 		break;
+	case REQUEST_CHECKPOINT_TASK_COMP:
+		_slurm_rpc_checkpoint_task_comp(msg);
+		slurm_free_checkpoint_task_comp_msg(msg->data);
+		break;
 	case REQUEST_SUSPEND:
 		_slurm_rpc_suspend(msg);
 		slurm_free_suspend_msg(msg->data);
@@ -2357,6 +2362,37 @@ inline static void  _slurm_rpc_checkpoint_comp(slurm_msg_t * msg)
 	}
 }
 
+inline static void  _slurm_rpc_checkpoint_task_comp(slurm_msg_t * msg)
+{
+	int error_code = SLURM_SUCCESS;
+	DEF_TIMERS;
+	checkpoint_task_comp_msg_t *ckpt_ptr;
+	/* Locks: read job */
+	slurmctld_lock_t job_read_lock = {
+		NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
+	uid_t uid;
+
+	ckpt_ptr = (checkpoint_task_comp_msg_t *) msg->data;
+	START_TIMER;
+	debug2("Processing RPC: REQUEST_CHECKPOINT_TASK_COMP");
+	uid = g_slurm_auth_get_uid(msg->auth_cred);
+
+	/* do RPC call and send reply */
+	lock_slurmctld(job_read_lock);
+	error_code = job_step_checkpoint_task_comp(ckpt_ptr, uid, msg->conn_fd);
+	unlock_slurmctld(job_read_lock);
+	END_TIMER2("_slurm_rpc_checkpoint_task_comp");
+
+	if (error_code) {
+		info("_slurm_rpc_checkpoint_task_comp %u.%u: %s",
+			ckpt_ptr->job_id, ckpt_ptr->step_id,
+			slurm_strerror(error_code));
+	} else {
+		info("_slurm_rpc_checkpoint_task_comp %u.%u %s",
+			ckpt_ptr->job_id, ckpt_ptr->step_id, TIME_STR);
+	}
+}
+
 static char **
 _xduparray(uint16_t size, char ** array)
 {
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index d9c0eaa0784..96fa329956d 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -435,6 +435,7 @@ struct 	step_record {
 	char *host;			/* host for srun communications */
 	uint16_t batch_step;		/* 1 if batch job step, 0 otherwise */
 	uint16_t ckpt_interval;		/* checkpoint interval in minutes */
+	char *ckpt_path;	        /* path to store checkpoint image files */
 	uint16_t exclusive;	/* FIXME */
 	time_t ckpt_time;		/* time of last checkpoint */
 	switch_jobinfo_t switch_job;	/* switch context, opaque */
@@ -844,6 +845,15 @@ extern int job_step_checkpoint(checkpoint_msg_t *ckpt_ptr,
  */
 extern int job_step_checkpoint_comp(checkpoint_comp_msg_t *ckpt_ptr,
 		uid_t uid, slurm_fd conn_fd);
+/*
+ * job_step_checkpoint_task_comp - note task checkpoint completion
+ * IN ckpt_ptr - checkpoint task complete status message
+ * IN uid - user id of the user issuing the RPC
+ * IN conn_fd - file descriptor on which to send reply
+ * RET 0 on success, otherwise ESLURM error code
+ */
+extern int job_step_checkpoint_task_comp(checkpoint_task_comp_msg_t *ckpt_ptr,
+                uid_t uid, slurm_fd conn_fd);
 
 /*
  * job_step_suspend - perform some suspend/resume operation
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index 747d4ec2b62..2009e3142f2 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -99,6 +99,7 @@ create_step_record (struct job_record *job_ptr)
 	step_ptr->step_id = (job_ptr->next_step_id)++;
 	step_ptr->start_time = time ( NULL ) ;
 	step_ptr->jobacct = jobacct_gather_g_create(NULL);
+	step_ptr->ckpt_path = NULL;
 	if (list_append (job_ptr->step_list, step_ptr) == NULL)
 		fatal ("create_step_record: unable to allocate memory");
 
@@ -143,6 +144,7 @@ delete_step_records (struct job_record *job_ptr, int filter)
 		FREE_NULL_BITMAP(step_ptr->exit_node_bitmap);
 		if (step_ptr->network)
 			xfree(step_ptr->network);
+		xfree(step_ptr->ckpt_path);
 		xfree(step_ptr);
 	}		
 
@@ -190,6 +192,7 @@ delete_step_record (struct job_record *job_ptr, uint32_t step_id)
 			FREE_NULL_BITMAP(step_ptr->exit_node_bitmap);
 			if (step_ptr->network)
 				xfree(step_ptr->network);
+			xfree(step_ptr->ckpt_path);
 			xfree(step_ptr);
 			error_code = 0;
 			break;
@@ -220,8 +223,8 @@ dump_step_desc(job_step_create_request_msg_t *step_spec)
 	debug3("   host=%s port=%u name=%s network=%s checkpoint=%u", 
 		step_spec->host, step_spec->port, step_spec->name,
 		step_spec->network, step_spec->ckpt_interval);
-	debug3("   exclusive=%u immediate=%u",
-		step_spec->exclusive, step_spec->immediate);
+	debug3("   checkpoint-path=%s exclusive=%u immediate=%u",
+	        step_spec->ckpt_path, step_spec->exclusive, step_spec->immediate);
 }
 
 
@@ -873,6 +876,7 @@ step_create(job_step_create_request_msg_t *step_specs,
 	step_ptr->ckpt_time = now;
 	step_ptr->exit_code = NO_VAL;
 	step_ptr->exclusive = step_specs->exclusive;
+	step_ptr->ckpt_path = xstrdup(step_specs->ckpt_path);
 
 	/* step's name and network default to job's values if not 
 	 * specified in the step specification */
@@ -884,7 +888,7 @@ step_create(job_step_create_request_msg_t *step_specs,
 		step_ptr->network = xstrdup(step_specs->network);
 	else
 		step_ptr->network = xstrdup(job_ptr->network);
-
+	
 	/* a batch script does not need switch info */
 	if (!batch_step) {
 		step_ptr->step_layout = 
@@ -1015,6 +1019,7 @@ static void _pack_ctld_job_step_info(struct step_record *step_ptr, Buf buffer)
 	packstr(step_ptr->name, buffer);
 	packstr(step_ptr->network, buffer);
 	pack_bit_fmt(step_ptr->step_node_bitmap, buffer);
+	packstr(step_ptr->ckpt_path, buffer);
 	
 }
 
@@ -1305,6 +1310,62 @@ extern int job_step_checkpoint_comp(checkpoint_comp_msg_t *ckpt_ptr,
 	return rc;
 }
 
+/*
+ * job_step_checkpoint_task_comp - note task checkpoint completion
+ * IN ckpt_ptr - checkpoint task complete status message
+ * IN uid - user id of the user issuing the RPC
+ * IN conn_fd - file descriptor on which to send reply
+ * RET 0 on success, otherwise ESLURM error code
+ */
+extern int job_step_checkpoint_task_comp(checkpoint_task_comp_msg_t *ckpt_ptr,
+		uid_t uid, slurm_fd conn_fd)
+{
+	int rc = SLURM_SUCCESS;
+	struct job_record *job_ptr;
+	struct step_record *step_ptr;
+	slurm_msg_t resp_msg;
+	return_code_msg_t rc_msg;
+	
+	slurm_msg_t_init(&resp_msg);
+		
+	/* find the job */
+	job_ptr = find_job_record (ckpt_ptr->job_id);
+	if (job_ptr == NULL) {
+		rc = ESLURM_INVALID_JOB_ID;
+		goto reply;
+	}
+	if ((uid != job_ptr->user_id) && (uid != 0)) {
+		rc = ESLURM_ACCESS_DENIED;
+		goto reply;
+	}
+	if (job_ptr->job_state == JOB_PENDING) {
+		rc = ESLURM_JOB_PENDING;
+		goto reply;
+	} else if ((job_ptr->job_state != JOB_RUNNING)
+	&&         (job_ptr->job_state != JOB_SUSPENDED)) {
+		rc = ESLURM_ALREADY_DONE;
+		goto reply;
+	}
+ 
+	step_ptr = find_step_record(job_ptr, ckpt_ptr->step_id);
+	if (step_ptr == NULL) {
+		rc = ESLURM_INVALID_JOB_ID;
+		goto reply;
+	} else {
+		rc = checkpoint_task_comp((void *)step_ptr, 
+			ckpt_ptr->task_id, ckpt_ptr->begin_time, 
+			ckpt_ptr->error_code, ckpt_ptr->error_msg);
+		last_job_update = time(NULL);
+	}
+
+    reply:
+	rc_msg.return_code = rc;
+	resp_msg.msg_type  = RESPONSE_SLURM_RC;
+	resp_msg.data      = &rc_msg;
+	(void) slurm_send_node_msg(conn_fd, &resp_msg);
+	return rc;
+}
+
 /*
  * step_partial_comp - Note the completion of a job step on at least
  *	some of its nodes
@@ -1568,6 +1629,7 @@ extern void dump_job_step_state(struct step_record *step_ptr, Buf buffer)
 	packstr(step_ptr->host,  buffer);
 	packstr(step_ptr->name, buffer);
 	packstr(step_ptr->network, buffer);
+	packstr(step_ptr->ckpt_path, buffer);
 	pack16(step_ptr->batch_step, buffer);
 	if (!step_ptr->batch_step) {
 		pack_slurm_step_layout(step_ptr->step_layout, buffer);
@@ -1588,7 +1650,7 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer)
 	uint16_t ckpt_interval;
 	uint32_t exit_code, name_len;
 	time_t start_time, pre_sus_time, ckpt_time;
-	char *host = NULL;
+	char *host = NULL, *ckpt_path = NULL;
 	char *name = NULL, *network = NULL, *bit_fmt = NULL;
 	switch_jobinfo_t switch_tmp = NULL;
 	check_jobinfo_t check_tmp = NULL;
@@ -1612,6 +1674,7 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer)
 	safe_unpackstr_xmalloc(&host, &name_len, buffer);
 	safe_unpackstr_xmalloc(&name, &name_len, buffer);
 	safe_unpackstr_xmalloc(&network, &name_len, buffer);
+	safe_unpackstr_xmalloc(&ckpt_path, &name_len, buffer);
 	safe_unpack16(&batch_step, buffer);
 	if (!batch_step) {
 		if (unpack_slurm_step_layout(&step_layout, buffer))
@@ -1642,6 +1705,7 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer)
 	step_ptr->cyclic_alloc = cyclic_alloc;
 	step_ptr->name         = name;
 	step_ptr->network      = network;
+	step_ptr->ckpt_path    = ckpt_path;
 	step_ptr->port         = port;
 	step_ptr->ckpt_interval= ckpt_interval;
 	step_ptr->host         = host;
@@ -1685,6 +1749,7 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer)
 	xfree(host);
 	xfree(name);
 	xfree(network);
+	xfree(ckpt_path);
 	xfree(bit_fmt);
 	if (switch_tmp)
 		switch_free_jobinfo(switch_tmp);
diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c
index 08690c02cad..5c20d1d8929 100644
--- a/src/slurmd/slurmd/req.c
+++ b/src/slurmd/slurmd/req.c
@@ -104,6 +104,7 @@ static int  _terminate_all_steps(uint32_t jobid, bool batch);
 static void _rpc_launch_tasks(slurm_msg_t *);
 static void _rpc_batch_job(slurm_msg_t *);
 static void _rpc_signal_tasks(slurm_msg_t *);
+static void _rpc_checkpoint_tasks(slurm_msg_t *);
 static void _rpc_terminate_tasks(slurm_msg_t *);
 static void _rpc_timelimit(slurm_msg_t *);
 static void _rpc_reattach_tasks(slurm_msg_t *);
@@ -178,6 +179,11 @@ slurmd_req(slurm_msg_t *msg)
 		_rpc_signal_tasks(msg);
 		slurm_free_kill_tasks_msg(msg->data);
 		break;
+	case REQUEST_CHECKPOINT_TASKS:
+		debug2("Processing RPC: REQUEST_CHECKPOINT_TASKS");
+		_rpc_checkpoint_tasks(msg);
+		slurm_free_checkpoint_tasks_msg(msg->data);
+		break;
 	case REQUEST_TERMINATE_TASKS:
 		debug2("Processing RPC: REQUEST_TERMINATE_TASKS");
 		_rpc_terminate_tasks(msg);
@@ -1061,6 +1067,50 @@ done:
 	slurm_send_rc_msg(msg, rc);
 }
 
+static void
+_rpc_checkpoint_tasks(slurm_msg_t *msg)
+{
+	int               fd;
+	int               rc = SLURM_SUCCESS;
+	uid_t             req_uid = g_slurm_auth_get_uid(msg->auth_cred);
+	checkpoint_tasks_msg_t *req = (checkpoint_tasks_msg_t *) msg->data;
+	slurmstepd_info_t *step;
+
+	fd = stepd_connect(conf->spooldir, conf->node_name,
+			   req->job_id, req->job_step_id);
+	if (fd == -1) {
+		debug("checkpoint for nonexistant %u.%u stepd_connect failed: %m",
+		      req->job_id, req->job_step_id);
+		rc = ESLURM_INVALID_JOB_ID;
+		goto done;
+	}
+	if ((step = stepd_get_info(fd)) == NULL) {
+		debug("checkpoint for nonexistent job %u.%u requested",
+		      req->job_id, req->job_step_id);
+		rc = ESLURM_INVALID_JOB_ID;
+		goto done2;
+	}
+
+	if ((req_uid != step->uid) && (!_slurm_authorized_user(req_uid))) {
+		debug("checkpoint req from uid %ld for job %u.%u owned by uid %ld",
+		      (long) req_uid, req->job_id, req->job_step_id,
+		      (long) step->uid);
+		rc = ESLURM_USER_ID_MISSING;     /* or bad in this case */
+		goto done3;
+	}
+
+	rc = stepd_checkpoint(fd, req->signal, req->timestamp);
+	if (rc == -1)
+		rc = ESLURMD_JOB_NOTRUNNING;
+
+ done3:
+	xfree(step);
+ done2:
+	close(fd);
+ done:
+	slurm_send_rc_msg(msg, rc);
+}
+
 static void
 _rpc_terminate_tasks(slurm_msg_t *msg)
 {
diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c
index ca51e7bb646..6e535c8f10a 100644
--- a/src/slurmd/slurmstepd/req.c
+++ b/src/slurmd/slurmstepd/req.c
@@ -72,6 +72,7 @@ static int _handle_info(int fd, slurmd_job_t *job);
 static int _handle_signal_process_group(int fd, slurmd_job_t *job, uid_t uid);
 static int _handle_signal_task_local(int fd, slurmd_job_t *job, uid_t uid);
 static int _handle_signal_container(int fd, slurmd_job_t *job, uid_t uid);
+static int _handle_checkpoint_tasks(int fd, slurmd_job_t *job, uid_t uid);
 static int _handle_attach(int fd, slurmd_job_t *job, uid_t uid);
 static int _handle_pid_in_container(int fd, slurmd_job_t *job);
 static int _handle_daemon_pid(int fd, slurmd_job_t *job);
@@ -464,6 +465,10 @@ _handle_request(int fd, slurmd_job_t *job, uid_t uid, gid_t gid)
 		debug("Handling REQUEST_SIGNAL_CONTAINER");
 		rc = _handle_signal_container(fd, job, uid);
 		break;
+	case REQUEST_CHECKPOINT_TASKS:
+		debug("Handling REQUEST_CHECKPOINT_TASKS");
+		rc = _handle_checkpoint_tasks(fd, job, uid);
+		break;
 	case REQUEST_STATE:
 		debug("Handling REQUEST_STATE");
 		rc = _handle_state(fd, job);
@@ -742,6 +747,76 @@ rwfail:
 	return SLURM_FAILURE;
 }
 
+static int
+_handle_checkpoint_tasks(int fd, slurmd_job_t *job, uid_t uid)
+{
+	static time_t last_timestamp = 0;
+	int rc = SLURM_SUCCESS;
+	int signal;
+	time_t timestamp;
+
+	debug3("_handle_checkpoint_tasks for job %u.%u",
+	       job->jobid, job->stepid);
+
+	safe_read(fd, &signal, sizeof(int));
+	safe_read(fd, &timestamp, sizeof(time_t));
+
+	debug3("  uid = %d", uid);
+	if (uid != job->uid && !_slurm_authorized_user(uid)) {
+		debug("checkpoint req from uid %ld for job %u.%u owned by uid %ld",
+		      (long)uid, job->jobid, job->stepid, (long)job->uid);
+		rc = EPERM;
+		goto done;
+	}
+
+	if (timestamp == last_timestamp) {
+		debug("duplicate checkpoint req for job %u.%u, timestamp %ld. discarded.",
+		      job->jobid, job->stepid, (long)timestamp);
+		rc = ESLURM_ALREADY_DONE; /* EINPROGRESS? */
+		goto done;
+	}
+
+       /*
+        * Sanity checks
+        */
+       if (job->pgid <= (pid_t)1) {
+               debug ("step %u.%u invalid [jmgr_pid:%d pgid:%u]",
+                       job->jobid, job->stepid, job->jmgr_pid, job->pgid);
+               rc = ESLURMD_JOB_NOTRUNNING;
+               goto done;
+       }
+
+       /*
+        * Signal the process group
+        */
+       pthread_mutex_lock(&suspend_mutex);
+       if (suspended) {
+               rc = ESLURMD_STEP_SUSPENDED;
+               pthread_mutex_unlock(&suspend_mutex);
+               goto done;
+       }
+
+       /* TODO: send timestamp with signal */
+       if (killpg(job->pgid, signal) == -1) {
+               rc = -1;        /* Most probable ESRCH, resulting in ESLURMD_JOB_NOTRUNNING */
+               verbose("Error sending signal %d to %u.%u, pgid %d, errno: %d: %s",
+                       signal, job->jobid, job->stepid, job->pgid,
+                       errno, slurm_strerror(rc));
+       } else {
+               last_timestamp = timestamp;
+               verbose("Sent signal %d to %u.%u, pgid %d",
+                       signal, job->jobid, job->stepid, job->pgid);
+       }
+       pthread_mutex_unlock(&suspend_mutex);
+
+done:
+       /* Send the return code */
+       safe_write(fd, &rc, sizeof(int));
+       return SLURM_SUCCESS;
+rwfail:
+       return SLURM_FAILURE;
+}
+
 static int
 _handle_terminate(int fd, slurmd_job_t *job, uid_t uid)
 {
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c
index f1f247bd63b..6982206c5ba 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.c
+++ b/src/slurmd/slurmstepd/slurmstepd_job.c
@@ -202,6 +202,7 @@ job_create(launch_tasks_request_msg_t *msg)
 	job->cpu_bind = xstrdup(msg->cpu_bind);
 	job->mem_bind_type = msg->mem_bind_type;
 	job->mem_bind = xstrdup(msg->mem_bind);
+	job->ckpt_path = xstrdup(msg->ckpt_path);
 
 	job->env     = _array_copy(msg->envc, msg->env);
 	job->eio     = eio_handle_create();
@@ -229,6 +230,7 @@ job_create(launch_tasks_request_msg_t *msg)
 	job->envtp->cpu_bind = NULL;
 	job->envtp->mem_bind_type = 0;
 	job->envtp->mem_bind = NULL;
+	job->envtp->ckpt_path = NULL;
 	
 	memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr));
 	slurm_set_addr(&resp_addr,
@@ -342,6 +344,7 @@ job_batch_job_create(batch_job_launch_msg_t *msg)
 	job->envtp->cpu_bind = NULL;
 	job->envtp->mem_bind_type = 0;
 	job->envtp->mem_bind = NULL;
+	job->envtp->ckpt_path = NULL;
 	
 	srun = srun_info_create(NULL, NULL, NULL);
 
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h
index fd02d2de02d..98dec76044c 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.h
+++ b/src/slurmd/slurmstepd/slurmstepd_job.h
@@ -191,6 +191,7 @@ typedef struct slurmd_job {
 	uint8_t        open_mode;	/* stdout/err append or truncate */
 	uint8_t        pty;		/* set if creating pseudo tty       */
 	job_options_t  options;
+	char          *ckpt_path;
 } slurmd_job_t;
 
 
diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c
index 94c021a3e72..ecea10e23f3 100644
--- a/src/slurmd/slurmstepd/task.c
+++ b/src/slurmd/slurmstepd/task.c
@@ -348,6 +348,7 @@ exec_task(slurmd_job_t *job, int i, int waitfd)
 	job->envtp->mem_bind = xstrdup(job->mem_bind);
 	job->envtp->mem_bind_type = job->mem_bind_type;
 	job->envtp->distribution = -1;
+	job->envtp->ckpt_path = xstrdup(job->ckpt_path);
 	setup_env(job->envtp);
 	setenvf(&job->envtp->env, "SLURMD_NODENAME", "%s", conf->node_name);
 	job->env = job->envtp->env;
diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index 8ae9adb3543..e7dade02129 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -510,6 +510,7 @@ create_job_step(srun_job_t *job)
 	
 	job->ctx_params.relative = (uint16_t)opt.relative;
 	job->ctx_params.ckpt_interval = (uint16_t)opt.ckpt_interval;
+	job->ctx_params.ckpt_path = opt.ckpt_path;
 	job->ctx_params.exclusive = (uint16_t)opt.exclusive;
 	job->ctx_params.immediate = (uint16_t)opt.immediate;
 	job->ctx_params.verbose_level = (uint16_t)_verbose;
diff --git a/src/srun/opt.c b/src/srun/opt.c
index a474e2e229e..4b4c1a249f8 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -162,7 +162,8 @@
 #define LONG_OPT_GET_USER_ENV    0x145
 #define LONG_OPT_PTY             0x146
 #define LONG_OPT_CHECKPOINT      0x147
-#define LONG_OPT_OPEN_MODE       0x148
+#define LONG_OPT_CHECKPOINT_PATH 0x148
+#define LONG_OPT_OPEN_MODE       0x149
 
 /*---- global variables, defined in opt.h ----*/
 int _verbose;
@@ -603,6 +604,7 @@ static void _opt_default()
 	opt.time_limit_str = NULL;
 	opt.ckpt_interval = 0;
 	opt.ckpt_interval_str = NULL;
+	opt.ckpt_path = NULL;
 	opt.partition = NULL;
 	opt.max_threads = MAX_THREADS;
 	pmi_server_max_threads(opt.max_threads);
@@ -756,6 +758,7 @@ env_vars_t env_vars[] = {
 {"SLURM_THREADS",       OPT_INT,        &opt.max_threads,   NULL             },
 {"SLURM_TIMELIMIT",     OPT_STRING,     &opt.time_limit_str,NULL             },
 {"SLURM_CHECKPOINT",    OPT_STRING,     &opt.ckpt_interval_str, NULL         },
+{"SLURM_CHECKPOINT_PATH",OPT_STRING,    &opt.ckpt_path,     NULL             },
 {"SLURM_WAIT",          OPT_INT,        &opt.max_wait,      NULL             },
 {"SLURM_DISABLE_STATUS",OPT_INT,        &opt.disable_status,NULL             },
 {"SLURM_MPI_TYPE",      OPT_MPI,        NULL,               NULL             },
@@ -1024,6 +1027,7 @@ static void set_options(const int argc, char **argv)
 		{"get-user-env",     optional_argument, 0, LONG_OPT_GET_USER_ENV},
 		{"pty",              no_argument,       0, LONG_OPT_PTY},
 		{"checkpoint",       required_argument, 0, LONG_OPT_CHECKPOINT},
+		{"checkpoint-path",  required_argument, 0, LONG_OPT_CHECKPOINT_PATH},
 		{"open-mode",        required_argument, 0, LONG_OPT_OPEN_MODE},
 		{NULL,               0,                 0, 0}
 	};
@@ -1542,6 +1546,10 @@ static void set_options(const int argc, char **argv)
 				      optarg);
 			}
 			break;
+		case LONG_OPT_CHECKPOINT_PATH:
+			xfree(opt.ckpt_path);
+			opt.ckpt_path = xstrdup(optarg);
+			break;
 		default:
 			if (spank_process_option (opt_char, optarg) < 0) {
 				exit (1);
@@ -1969,7 +1977,10 @@ static bool _opt_verify(void)
 			error("Invalid checkpoint interval specification");
 			exit(1);
 		}
-	} 
+	}
+
+	if (! opt.ckpt_path)
+		opt.ckpt_path = xstrdup(opt.cwd);
 
 	if ((opt.euid != (uid_t) -1) && (opt.euid != opt.uid)) 
 		opt.uid = opt.euid;
@@ -2091,6 +2102,7 @@ static void _opt_list()
 		info("time_limit     : %d", opt.time_limit);
 	if (opt.ckpt_interval)
 		info("checkpoint     : %d secs", opt.ckpt_interval);
+	info("checkpoint_path: %s", opt.ckpt_path);
 	info("wait           : %d", opt.max_wait);
 	if (opt.nice)
 		info("nice           : %d", opt.nice);
@@ -2168,6 +2180,7 @@ static void _usage(void)
 "            [--share] [--label] [--unbuffered] [-m dist] [-J jobname]\n"
 "            [--jobid=id] [--verbose] [--slurmd_debug=#]\n"
 "            [--core=type] [-T threads] [-W sec] [--checkpoint=time]\n"
+"            [--checkpoint-path=dir]\n"
 "            [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n"
 "            [--mpi=type] [--account=name] [--dependency=type:jobid]\n"
 "            [--kill-on-bad-exit] [--propagate[=rlimits] [--comment=name]\n"
@@ -2248,6 +2261,7 @@ static void _help(void)
 "                              configuration specification for multiple programs\n"
 "      --get-user-env          used by Moab.  See srun man page.\n"
 "      --checkpoint=time       job step checkpoint interval\n"
+"      --checkpoint-path=dir   path to store job step checkpoint image files\n"
 #ifdef HAVE_PTY_H
 "      --pty                   run task zero in pseudo terminal\n"
 #endif
diff --git a/src/srun/opt.h b/src/srun/opt.h
index b18939420a8..9923c24a91d 100644
--- a/src/srun/opt.h
+++ b/src/srun/opt.h
@@ -111,6 +111,7 @@ typedef struct srun_options {
 	char *time_limit_str;	/* --time,   -t (string)	*/
 	int  ckpt_interval;	/* --checkpoint (int minutes)	*/
 	char *ckpt_interval_str;/* --checkpoint (string)	*/
+	char *ckpt_path;	/* --checkpoint-path (string)   */
 	bool exclusive;		/* --exclusive			*/
 	char *partition;	/* --partition=n,   -p n   	*/
 	enum task_dist_states
diff --git a/src/srun/srun.c b/src/srun/srun.c
index 344f57d8df6..f64e033a4a8 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -157,6 +157,7 @@ int srun(int ac, char **av)
 	env->nodeid = -1;
 	env->cli = NULL;
 	env->env = NULL;
+	env->ckpt_path = NULL;
 
 	logopt.stderr_level += _slurm_debug_env_val();
 	log_init(xbasename(av[0]), logopt, 0, NULL);
@@ -357,6 +358,7 @@ int srun(int ac, char **av)
 	launch_params.ntasks_per_node   = opt.ntasks_per_node;
 	launch_params.ntasks_per_socket = opt.ntasks_per_socket;
 	launch_params.ntasks_per_core   = opt.ntasks_per_core;
+	launch_params.ckpt_path = xstrdup(opt.ckpt_path);
 
 	/* job structure should now be filled in */
 	_setup_signals();
-- 
GitLab