From 1b52ea8a94c9cc7af3c2ff82f74da930ff0a063d Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Tue, 23 Jul 2013 14:00:06 -0700 Subject: [PATCH] Remove duplicate job_container/none logic and made job_container/cncu work without a real cray system (pretty much what job_container/none used to do. --- src/plugins/job_container/cncu/Makefile.am | 5 +- src/plugins/job_container/cncu/Makefile.in | 7 +- .../job_container/cncu/job_container_cncu.c | 24 +- .../job_container/none/job_container_none.c | 229 +----------------- src/plugins/proctrack/pgid/Makefile.in | 22 +- 5 files changed, 39 insertions(+), 248 deletions(-) diff --git a/src/plugins/job_container/cncu/Makefile.am b/src/plugins/job_container/cncu/Makefile.am index ea411c1aa2c..88c9edb650e 100644 --- a/src/plugins/job_container/cncu/Makefile.am +++ b/src/plugins/job_container/cncu/Makefile.am @@ -6,10 +6,7 @@ PLUGIN_FLAGS = -module -avoid-version --export-dynamic AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common $(CRAY_CPPFLAGS) -if HAVE_REAL_CRAY -CNCU_PLUGIN = job_container_cncu.la -endif -pkglib_LTLIBRARIES = $(CNCU_PLUGIN) +pkglib_LTLIBRARIES = job_container_cncu.la # job_container/cncu plugin. job_container_cncu_la_SOURCES = job_container_cncu.c diff --git a/src/plugins/job_container/cncu/Makefile.in b/src/plugins/job_container/cncu/Makefile.in index 9bc5aa5d42a..76a2e19a5fa 100644 --- a/src/plugins/job_container/cncu/Makefile.in +++ b/src/plugins/job_container/cncu/Makefile.in @@ -139,8 +139,6 @@ job_container_cncu_la_OBJECTS = $(am_job_container_cncu_la_OBJECTS) job_container_cncu_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(job_container_cncu_la_LDFLAGS) $(LDFLAGS) -o $@ -@HAVE_REAL_CRAY_TRUE@am_job_container_cncu_la_rpath = -rpath \ -@HAVE_REAL_CRAY_TRUE@ $(pkglibdir) DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles @@ -398,8 +396,7 @@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign PLUGIN_FLAGS = -module -avoid-version --export-dynamic AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common $(CRAY_CPPFLAGS) -@HAVE_REAL_CRAY_TRUE@CNCU_PLUGIN = job_container_cncu.la -pkglib_LTLIBRARIES = $(CNCU_PLUGIN) +pkglib_LTLIBRARIES = job_container_cncu.la # job_container/cncu plugin. job_container_cncu_la_SOURCES = job_container_cncu.c @@ -471,7 +468,7 @@ clean-pkglibLTLIBRARIES: rm -f "$${dir}/so_locations"; \ done job_container_cncu.la: $(job_container_cncu_la_OBJECTS) $(job_container_cncu_la_DEPENDENCIES) $(EXTRA_job_container_cncu_la_DEPENDENCIES) - $(job_container_cncu_la_LINK) $(am_job_container_cncu_la_rpath) $(job_container_cncu_la_OBJECTS) $(job_container_cncu_la_LIBADD) $(LIBS) + $(job_container_cncu_la_LINK) -rpath $(pkglibdir) $(job_container_cncu_la_OBJECTS) $(job_container_cncu_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) diff --git a/src/plugins/job_container/cncu/job_container_cncu.c b/src/plugins/job_container/cncu/job_container_cncu.c index 64e4ae208b0..999adbbf4af 100644 --- a/src/plugins/job_container/cncu/job_container_cncu.c +++ b/src/plugins/job_container/cncu/job_container_cncu.c @@ -43,7 +43,9 @@ #include <sys/stat.h> #include <fcntl.h> +#ifdef HAVE_REAL_CRAY #include <job.h> /* Cray's job module component */ +#endif #include "slurm/slurm_errno.h" #include "src/common/slurm_xlator.h" @@ -183,6 +185,7 @@ static int _restore_state(char *dir_name) return error_code; } +#ifdef HAVE_REAL_CRAY static void _stat_reservation(char *type, rid_t resv_id) { struct job_resv_stat buf; @@ -196,6 +199,7 @@ static void _stat_reservation(char *type, rid_t resv_id) buf.num_files, buf.num_ipc_objs); } } +#endif static bool _get_debug_flag(void) { @@ -266,8 +270,10 @@ extern int container_p_restore(char *dir_name, bool recover) extern int container_p_create(uint32_t job_id) { +#ifdef HAVE_REAL_CRAY rid_t resv_id = job_id; int rc; +#endif int i, empty = -1, found = -1; bool job_id_change = false; @@ -298,6 +304,7 @@ extern int container_p_create(uint32_t job_id) _save_state(state_dir); slurm_mutex_unlock(&context_lock); +#ifdef HAVE_REAL_CRAY rc = job_create_reservation(resv_id, CREATE_FLAGS); if ((rc == 0) || (errno == EEXIST)) { if ((rc != 0) && (errno == EEXIST)) { @@ -308,6 +315,9 @@ extern int container_p_create(uint32_t job_id) _stat_reservation("create", resv_id); return SLURM_SUCCESS; } +#else + return SLURM_SUCCESS; +#endif error("%s: create(%u): %m", plugin_type, job_id); return SLURM_ERROR; } @@ -315,15 +325,18 @@ extern int container_p_create(uint32_t job_id) /* Add proctrack container (PAGG) to a job container */ extern int container_p_add_cont(uint32_t job_id, uint64_t cont_id) { +#ifdef HAVE_REAL_CRAY jid_t cjob_id = cont_id; rid_t resv_id = job_id; int rc; +#endif if (enable_debug) { info("%s: adding cont(%u.%"PRIu64")", plugin_type, job_id, cont_id); } +#ifdef HAVE_REAL_CRAY rc = job_attach_reservation(cjob_id, resv_id, ADD_FLAGS); if ((rc != 0) && (errno == ENOENT)) { /* Log and retry */ error("%s: add(%u.%"PRIu64"): No reservation found", @@ -331,11 +344,15 @@ extern int container_p_add_cont(uint32_t job_id, uint64_t cont_id) rc = job_create_reservation(resv_id, CREATE_FLAGS); rc = job_attach_reservation(cjob_id, resv_id, ADD_FLAGS); } + if (rc == 0) { if (enable_debug) _stat_reservation("add", resv_id); return SLURM_SUCCESS; } +#else + return SLURM_SUCCESS; +#endif error("%s: add(%u.%"PRIu64"): %m", plugin_type, job_id, cont_id); return SLURM_ERROR; } @@ -364,8 +381,10 @@ extern int container_p_add_pid(uint32_t job_id, pid_t pid, uid_t uid) extern int container_p_delete(uint32_t job_id) { +#ifdef HAVE_REAL_CRAY rid_t resv_id = job_id; - int rc; +#endif + int rc = 0; int i, found = -1; bool job_id_change = false; @@ -384,8 +403,9 @@ extern int container_p_delete(uint32_t job_id) if (job_id_change) _save_state(state_dir); slurm_mutex_unlock(&context_lock); - +#ifdef HAVE_REAL_CRAY rc = job_end_reservation(resv_id, DELETE_FLAGS); +#endif if (rc == 0) return SLURM_SUCCESS; diff --git a/src/plugins/job_container/none/job_container_none.c b/src/plugins/job_container/none/job_container_none.c index d883b072720..94705767921 100644 --- a/src/plugins/job_container/none/job_container_none.c +++ b/src/plugins/job_container/none/job_container_none.c @@ -42,11 +42,8 @@ #include <sys/stat.h> #include <sys/types.h> -#include "slurm/slurm_errno.h" #include "src/common/slurm_xlator.h" -#include "src/slurmd/common/proctrack.h" - -#define JOB_BUF_SIZE 128 +#include "slurm/slurm_errno.h" /* * These variables are required by the generic plugin interface. If they @@ -79,124 +76,9 @@ const char plugin_name[] = "job_container none plugin"; const char plugin_type[] = "job_container/none"; const uint32_t plugin_version = 101; -char *state_dir = NULL; /* state save directory */ -static bool enable_debug = false; - -static uint32_t *job_id_array = NULL; -static uint32_t job_id_count = 0; -static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER; - -static int _save_state(char *dir_name) -{ - char *file_name; - int ret = SLURM_SUCCESS; - int state_fd; - - if (!dir_name) { - error("job_container state directory is NULL"); - return SLURM_ERROR; - } - file_name = xstrdup(dir_name); - xstrcat(file_name, "/job_container_state"); - (void) unlink(file_name); - state_fd = creat(file_name, 0600); - if (state_fd < 0) { - error("Can't save state, error creating file %s %m", - file_name); - ret = SLURM_ERROR; - } else { - char *buf = (char *) job_id_array; - size_t len = job_id_count * sizeof(uint32_t); - while (1) { - int wrote = write(state_fd, buf, len); - if ((wrote < 0) && (errno == EINTR)) - continue; - if (wrote == 0) - break; - if (wrote < 0) { - error("Can't save job_container state: %m"); - ret = SLURM_ERROR; - break; - } - buf += wrote; - len -= wrote; - } - close(state_fd); - } - xfree(file_name); - - return ret; -} - -static int _restore_state(char *dir_name) -{ - char *data = NULL, *file_name; - int error_code = SLURM_SUCCESS; - int state_fd, data_allocated = 0, data_read = 0, data_size = 0; - - if (!dir_name) { - error("job_container state directory is NULL"); - return SLURM_ERROR; - } - - file_name = xstrdup(dir_name); - xstrcat(file_name, "/job_container_state"); - state_fd = open (file_name, O_RDONLY); - if (state_fd >= 0) { - data_allocated = JOB_BUF_SIZE; - data = xmalloc(data_allocated); - while (1) { - data_read = read(state_fd, &data[data_size], - JOB_BUF_SIZE); - if ((data_read < 0) && (errno == EINTR)) - continue; - if (data_read < 0) { - error ("Read error on %s, %m", file_name); - error_code = SLURM_ERROR; - break; - } else if (data_read == 0) - break; - data_size += data_read; - data_allocated += data_read; - xrealloc(data, data_allocated); - } - close(state_fd); - xfree(file_name); - } else { - error("No %s file for %s state recovery", - file_name, plugin_type); - xfree(file_name); - return SLURM_SUCCESS; - } - - if (error_code == SLURM_SUCCESS) { - job_id_array = (uint32_t *) data; - job_id_count = data_size / sizeof(uint32_t); - } - - return error_code; -} - -static bool _get_debug_flag(void) -{ - if (slurm_get_debug_flags() & DEBUG_FLAG_JOB_CONT) - return true; - return false; -} - extern void container_p_reconfig(void) { - bool new_debug_flag = _get_debug_flag(); - - if (!enable_debug && new_debug_flag) { - error("%s: DebugFlag enabled by reconfiguration, this may " - "result in errors due to missing job cache information", - plugin_name); - } else if (enable_debug != new_debug_flag) { - debug("%s: JobContainer DebugFlag changed to %d", - plugin_name, (int) new_debug_flag); - } - enable_debug = new_debug_flag; + return; } /* @@ -205,11 +87,7 @@ extern void container_p_reconfig(void) */ extern int init(void) { - enable_debug = _get_debug_flag(); - if (enable_debug) - info("%s loaded", plugin_name); - else - debug("%s loaded", plugin_name); + debug("%s loaded", plugin_name); return SLURM_SUCCESS; } @@ -220,133 +98,32 @@ extern int init(void) */ extern int fini(void) { - xfree(state_dir); return SLURM_SUCCESS; } extern int container_p_restore(char *dir_name, bool recover) { - if (enable_debug) { - int i; - - slurm_mutex_lock(&context_lock); - _restore_state(dir_name); - slurm_mutex_unlock(&context_lock); - for (i = 0; i < job_id_count; i++) { - if (job_id_array[i] == 0) - continue; - if (recover) { - info("%s: recovered job(%u)", - plugin_type, job_id_array[i]); - } else { - info("%s: purging job(%u)", - plugin_type, job_id_array[i]); - job_id_array[i] = 0; - } - } - } - - xfree(state_dir); - state_dir = xstrdup(dir_name); return SLURM_SUCCESS; } extern int container_p_create(uint32_t job_id) { - if (enable_debug) { - int i, empty = -1, found = -1; - bool job_id_change = false; - - info("%s: creating(%u)", plugin_type, job_id); - slurm_mutex_lock(&context_lock); - for (i = 0; i < job_id_count; i++) { - if (job_id_array[i] == 0) { - empty = i; - } else if (job_id_array[i] == job_id) { - found = i; - break; - } - } - if (found == -1) { - if (empty == -1) { - empty = job_id_count; - job_id_count += 4; - job_id_array = xrealloc(job_id_array, - sizeof(uint32_t)*job_id_count); - } - job_id_array[empty] = job_id; - job_id_change = true; - } else { - info("%s: duplicate create job(%u)", plugin_type, job_id); - } - if (job_id_change) - _save_state(state_dir); - slurm_mutex_unlock(&context_lock); - } - return SLURM_SUCCESS; } /* Add proctrack container (PAGG) to a job container */ extern int container_p_add_cont(uint32_t job_id, uint64_t cont_id) { - if (enable_debug) { - /* This is called from slurmstepd, so the job_id_array is NULL - * here.The array is only set by slurmstepd */ - info("%s: adding cont(%u.%"PRIu64")", plugin_type, job_id, - cont_id); - } - return SLURM_SUCCESS; } /* Add a process to a job container, create the proctrack container to add */ extern int container_p_add_pid(uint32_t job_id, pid_t pid, uid_t uid) { - if (enable_debug) { - stepd_step_rec_t job; - - info("%s: adding pid(%u.%u)", plugin_type, job_id, - (uint32_t) pid); - - memset(&job, 0, sizeof(stepd_step_rec_t)); - job.jmgr_pid = pid; - job.uid = uid; - if (proctrack_g_create(&job) != SLURM_SUCCESS) { - error("%s: proctrack_g_create job(%u)", - plugin_type, job_id); - return SLURM_ERROR; - } - - proctrack_g_add(&job, pid); - - return container_p_add_cont(job_id, job.cont_id); - } - return SLURM_SUCCESS; } extern int container_p_delete(uint32_t job_id) { - if (enable_debug) { - int i, found = -1; - bool job_id_change = false; - - info("%s: deleting(%u)", plugin_type, job_id); - slurm_mutex_lock(&context_lock); - for (i = 0; i < job_id_count; i++) { - if (job_id_array[i] == job_id) { - job_id_array[i] = 0; - job_id_change = true; - found = i; - } - } - if (found == -1) - info("%s: no job for delete(%u)", plugin_type, job_id); - if (job_id_change) - _save_state(state_dir); - slurm_mutex_unlock(&context_lock); - } - return SLURM_SUCCESS; } diff --git a/src/plugins/proctrack/pgid/Makefile.in b/src/plugins/proctrack/pgid/Makefile.in index ca379238f6c..c4a0639800f 100644 --- a/src/plugins/proctrack/pgid/Makefile.in +++ b/src/plugins/proctrack/pgid/Makefile.in @@ -23,15 +23,15 @@ am__make_dryrun = \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ - echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ - | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ - for am__flg in $$MAKEFLAGS; do \ - case $$am__flg in \ - *=*|--*) ;; \ - *n*) am__dry=yes; break;; \ - esac; \ - done;; \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ esac; \ test $$am__dry = yes; \ } @@ -129,7 +129,7 @@ am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ - $(am__cd) "$$dir" && rm -f $$files; }; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkglibdir)" LTLIBRARIES = $(pkglib_LTLIBRARIES) @@ -410,7 +410,7 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__confi case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ - && { if test -f $@; then exit 0; else break; fi; }; \ + && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ @@ -467,7 +467,7 @@ clean-pkglibLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done -proctrack_pgid.la: $(proctrack_pgid_la_OBJECTS) $(proctrack_pgid_la_DEPENDENCIES) $(EXTRA_proctrack_pgid_la_DEPENDENCIES) +proctrack_pgid.la: $(proctrack_pgid_la_OBJECTS) $(proctrack_pgid_la_DEPENDENCIES) $(EXTRA_proctrack_pgid_la_DEPENDENCIES) $(proctrack_pgid_la_LINK) -rpath $(pkglibdir) $(proctrack_pgid_la_OBJECTS) $(proctrack_pgid_la_LIBADD) $(LIBS) mostlyclean-compile: -- GitLab