From eca5574099bdc9c25e933a0049cd8c56a5dc0ef9 Mon Sep 17 00:00:00 2001
From: Mark Grondona <mgrondona@llnl.gov>
Date: Fri, 3 Jan 2003 01:06:35 +0000
Subject: [PATCH]  o Build script on the fly for binaries submitted with
 --batch    (to match documentation)  o Use access(2) to determine if user has
 local access to file    (instead of brute force examination of stat buffer) 
 o issue warning if local command cannot be found with --batch

---
 src/common/xstring.h |   2 +-
 src/srun/job.c       |  22 +++++-
 src/srun/job.h       |   9 ++-
 src/srun/opt.c       | 127 ++++++++-----------------------
 src/srun/srun.c      | 173 +++++++++++++++++++++++++------------------
 5 files changed, 158 insertions(+), 175 deletions(-)

diff --git a/src/common/xstring.h b/src/common/xstring.h
index 05efbca481e..0620c52cf3f 100644
--- a/src/common/xstring.h
+++ b/src/common/xstring.h
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  xstring.h - "safe" string processing functions with automatic memory 
- *	management
+ *	        management
  ******************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
diff --git a/src/srun/job.c b/src/srun/job.c
index cd4f0e2d872..61ffc9e90e8 100644
--- a/src/srun/job.c
+++ b/src/srun/job.c
@@ -83,8 +83,9 @@ _job_create_internal(allocation_info_t *info)
 	hl = hostlist_create(job->nodelist);
 	job->nhosts = hostlist_count(hl);
 
-	job->jobid  = info->jobid;
-	job->stepid = info->stepid;
+	job->jobid   = info->jobid;
+	job->stepid  = info->stepid;
+	job->old_job = false;
 
 	job->slurmd_addr = xmalloc(job->nhosts * sizeof(slurm_addr));
 	if (info->addrs)
@@ -277,8 +278,23 @@ job_force_termination(job_t *job)
 	pthread_kill(job->ioid, SIGTERM);
 }
 
+void job_fatal(job_t *job, const char *msg)
+{
+	if (msg) error(msg);
+
+	job_destroy(job);
+
+	exit(1);
+}
 
 void 
-job_destroy(job_t *job, const char *msg)
+job_destroy(job_t *job)
 {
+	if (job->old_job) {
+		debug("cancelling job step %u.%u", job->jobid, job->stepid);
+		slurm_complete_job_step(job->jobid, job->stepid, 0, 0);
+	} else if (!opt.no_alloc) {
+		debug("cancelling job %u", job->jobid);
+		slurm_complete_job(job->jobid, 0, 0);
+	}
 }
diff --git a/src/srun/job.h b/src/srun/job.h
index 12a653ba356..2cfe4e400d8 100644
--- a/src/srun/job.h
+++ b/src/srun/job.h
@@ -44,10 +44,11 @@ typedef enum {
 
 
 typedef struct srun_job {
-	uint32_t jobid;		/* assigned job id 	*/
-	uint32_t stepid;	/* assigned step id 	*/
+	uint32_t jobid;		/* assigned job id 	                  */
+	uint32_t stepid;	/* assigned step id 	                  */
+	bool old_job;           /* run job step under previous allocation */
 
-	job_state_t state;	/* job state	   	*/
+	job_state_t state;	/* job state	   	                  */
 	pthread_mutex_t state_mutex; 
 	pthread_cond_t  state_cond;
 
@@ -112,5 +113,7 @@ void    job_force_termination(job_t *job);
 job_t * job_create_noalloc(void);
 job_t * job_create_allocation(resource_allocation_response_msg_t *resp);
 
+void    job_fatal(job_t *job, const char *msg);
+void    job_destroy(job_t *job);
 
 #endif /* !_HAVE_JOB_H */
diff --git a/src/srun/opt.c b/src/srun/opt.c
index 292a44a4b53..947e5338207 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -230,7 +230,7 @@ struct poptOption runTable[] = {
 	 "threads"},
 	{"wait", 'W', POPT_ARG_INT, &opt.max_wait, OPT_WAIT,
 	 "seconds to wait after first task ends before killing job",
-	 "seconds"},
+	 "sec"},
 	POPT_TABLEEND
 };
 
@@ -327,8 +327,7 @@ static void _opt_list(void);
 /* search PATH for command 
  * returns full path
  */
-static char * _search_path(char *);
-static char * _find_file_path (char *fname);
+static char * _search_path(char *, bool);
 
 static void _print_version(void);
 static bool _valid_node_list(char **node_list_pptr);
@@ -397,7 +396,7 @@ static bool _valid_node_list(char **node_list_pptr)
 	}
 	(void) fclose(fd);
 
-/* 	free(*node_list_pptr);	orphanned */
+        /*  free(*node_list_pptr);	orphanned */
 	*node_list_pptr = node_list;
 	return true;
 }
@@ -558,7 +557,7 @@ static void _opt_default()
 
 	if ((getcwd(buf, MAXPATHLEN)) == NULL) 
 		fatal("getcwd failed: %m");
-	opt.cwd = strdup(buf);
+	opt.cwd = xstrdup(buf);
 
 	opt.progname = NULL;
 
@@ -736,9 +735,7 @@ static void _opt_args(int ac, char **av)
 
 	optctx = poptGetContext("srun", ac, (const char **) av, options,
 				POPT_CONTEXT_POSIXMEHARDER);
-
 	poptSetOtherOptionHelp(optctx, "[OPTIONS...] executable [args...]");
-
 	poptReadDefaultConfig(optctx, 0);
 
 	/* first pass through args to see if attach or allocate mode
@@ -774,7 +771,8 @@ static void _opt_args(int ac, char **av)
 
 		case OPT_BATCH:
 			if (opt.allocate || opt.attach) {
-				error("can only specify one mode: allocate, attach or batch.");
+				error("can only specify one mode: "
+				      "allocate, attach or batch.");
 				exit(1);
 			}
 			mode = MODE_BATCH;
@@ -811,9 +809,8 @@ static void _opt_args(int ac, char **av)
 		case OPT_DISTRIB:
 			opt.distribution = _verify_dist_type(arg);
 			if (opt.distribution == SRUN_DIST_UNKNOWN) {
-				argerror
-				    ("Error: distribution type `%s' is not recognized",
-				     arg);
+				error("Error: distribution type `%s' " 
+				       "is not recognized", arg);
 				poptPrintUsage(optctx, stderr, 0);
 				exit(1);
 			}
@@ -897,13 +894,11 @@ static void _opt_args(int ac, char **av)
 		remote_argv[i] = strdup(rest[i]);
 	remote_argv[i] = NULL;	/* End of argv's (for possible execv) */
 
-	if ((opt.batch == 0) && (opt.allocate == 0) && (remote_argc > 0)) {
-		if ((fullpath = _search_path(remote_argv[0])) != NULL) {
-			free(remote_argv[0]);
-			remote_argv[0] = fullpath;
-		} 
-	} else if (remote_argc > 0) {
-		if ((fullpath = _find_file_path(remote_argv[0])) != NULL) {
+
+	if (remote_argc > 0) {
+		bool search_cwd = (opt.batch || opt.allocate);
+
+		if ((fullpath = _search_path(remote_argv[0], search_cwd))) {
 			free(remote_argv[0]);
 			remote_argv[0] = fullpath;
 		} 
@@ -915,7 +910,6 @@ static void _opt_args(int ac, char **av)
 	}
 
 	poptFreeContext(optctx);
-
 }
 
 /* 
@@ -1021,11 +1015,17 @@ _opt_verify(poptContext optctx)
 	return verified;
 }
 
+static void
+_freeF(void *data)
+{
+	xfree(data);
+}
+
 static List
 _create_path_list(void)
 {
-	List l = list_create(&free);
-	char *path = strdup(getenv("PATH"));
+	List l = list_create(&_freeF);
+	char *path = xstrdup(getenv("PATH"));
 	char *c, *lc;
 
 	if (!path) {
@@ -1041,109 +1041,46 @@ _create_path_list(void)
 			/* nullify and push token onto list */
 			*c = '\0';
 			if (lc != NULL && strlen(lc) > 0)
-				list_append(l, strdup(lc));
+				list_append(l, xstrdup(lc));
 			lc = ++c;
 		} else
 			c++;
 	}
 
 	if (strlen(lc) > 0)
-		list_append(l, strdup(lc));
+		list_append(l, xstrdup(lc));
 
-	free(path);
+	xfree(path);
 
 	return l;
 }
 
 static char *
-_search_path(char *cmd)
+_search_path(char *cmd, bool check_current_dir)
 {
 	List l = _create_path_list();
 	ListIterator i = list_iterator_create(l);
 	char *path, *fullpath = NULL;
-	struct stat stat_buf;
+
+	if (check_current_dir) 
+		list_prepend(l, xstrdup(opt.cwd));
 
 	while ((path = list_next(i))) {
 		xstrcat(fullpath, path);
 		xstrcatchar(fullpath, '/');
 		xstrcat(fullpath, cmd);
 
-		if (   (stat(fullpath, &stat_buf) == 0)
-		    && (stat_buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
+		if (access(fullpath, R_OK | X_OK) == 0)
 			goto done;
-		} else {
-			xfree(fullpath);
-			fullpath = NULL;
-		}
+
+		xfree(fullpath);
+		fullpath = NULL;
 	}
   done:
 	list_destroy(l);
 	return fullpath;
 }
 
-/* _find_file_path - given a filename, return the full path to a regular file 
- *	of that name that can be read or NULL otherwise
- * NOTE: The calling function must xfree the return value (if set) 
- */
-static char *
-_find_file_path (char *fname)
-{
-	int modes;
-	char *pathname;
-	struct stat stat_buf;
-
-	if (fname == NULL)
-		return NULL;
-
-	pathname = xmalloc (PATH_MAX);
-
-	/* generate a fully qualified pathname */
-	if (fname[0] == '/') {
-		if ((strlen (fname) + 1) > PATH_MAX) {
-			error ("Supplied filename too long: %s", fname);
-			goto cleanup;
-		}
-		strcpy (pathname, fname);
-	} else {
-		getcwd (pathname, PATH_MAX);
-		if ((strlen (pathname) + strlen (fname) + 2) > PATH_MAX) {
-			error ("Supplied filename too long: %s", fname);
-			goto cleanup;
-		}
-		strcat (pathname, "/");
-		strcat (pathname, fname);
-	}
-
-	/* determine if the file is accessable */
-	if (stat (pathname, &stat_buf) < 0) {
-		error ("Unable to stat file %s: %m", pathname);
-		goto cleanup;
-	}
-
-	if (S_ISREG (stat_buf.st_mode) == 0) {
-		error ("%s is not a regular file", pathname);
-		goto cleanup;
-	}
-
-	if (stat_buf.st_uid == getuid())
-		modes = (stat_buf.st_mode >> 6) & 0x7;
-	else if (stat_buf.st_gid == getgid())
-		modes = (stat_buf.st_mode >> 3) & 0x7;
-	else
-		modes =  stat_buf.st_mode       & 0x7;
-
-	if ((modes & 0x4) == 0) {
-		error ("%s can not be read", pathname);
-		goto cleanup;
-	}
-
-	return pathname;
-
-    cleanup:
-	xfree (pathname);
-	return NULL;
-}
-
 #if	__DEBUG
 
 /* helper function for printing options
diff --git a/src/srun/srun.c b/src/srun/srun.c
index 341174ce193..ee15ca7a02f 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -91,17 +91,13 @@ static void		 _create_job_step(job_t *job);
 static void		 _sig_kill_alloc(int signum);
 static char 		*_build_script (char *pathname, int file_type);
 static char 		*_get_shell (void);
-static int 		 _is_file_text (char *fname, char** shell_ptr);
+static int 		 _is_file_text (char *, char**);
 static int		 _run_batch_job (void);
 static allocation_resp	*_existing_allocation(void);
 static void		 _run_job_script(uint32_t jobid, uint32_t node_cnt);
 static int               _set_batch_script_env(uint32_t jobid, 
 					       uint32_t node_cnt);
 
-#define die(msg, args...) do { \
-	  error(msg, ##args);  \
-	  goto cleanup;        \
-	} while (0)
 
 #ifdef HAVE_LIBELAN3
 #  include "src/common/qsw.h"
@@ -113,7 +109,6 @@ srun(int ac, char **av)
 {
 	allocation_resp *resp;
 	job_t *job;
-	bool old_job = false;
 	struct rlimit rlim;
 
 	log_options_t logopt = LOG_OPTS_STDERR_ONLY;
@@ -142,7 +137,7 @@ srun(int ac, char **av)
 	 * create a job from opt
 	 */
 	if (opt.batch) {
-		if (_run_batch_job())
+		if (_run_batch_job() < 0)
 			exit (1);
 		exit (0);
 
@@ -155,11 +150,11 @@ srun(int ac, char **av)
 
 	} else if ( (resp = _existing_allocation()) ) {
 		if (opt.allocate) {
-			error("job already has an allocation");
+			error("job %u already has an allocation", resp->job_id);
 			exit(1);
 		}
-		old_job = true;
 		job = job_create_allocation(resp); 
+		job->old_job = true;
 		_create_job_step(job);
 		slurm_free_resource_allocation_response_msg(resp);
 
@@ -195,25 +190,25 @@ srun(int ac, char **av)
 	sig_setup_sigmask();
 
 	if (msg_thr_create(job) < 0)
-		die("Unable to create msg thread");
+		job_fatal(job, "Unable to create msg thread");
 
 	if (io_thr_create(job) < 0) 
-		die("failed to initialize IO");
+		job_fatal(job, "failed to initialize IO");
 
 	if (sig_thr_create(job) < 0)
-		die("Unable to create signals thread: %m");
+		job_fatal(job, "Unable to create signals thread: %m");
 
 	if (launch_thr_create(job) < 0)
-		die("Unable to create launch thread: %m");
+		job_fatal(job, "Unable to create launch thread: %m");
 
 	/* wait for job to terminate */
-	pthread_mutex_lock(&job->state_mutex);
+	slurm_mutex_lock(&job->state_mutex);
 	debug3("before main state loop: state = %d", job->state);
 	while ((job->state != SRUN_JOB_OVERDONE) && 
 	       (job->state != SRUN_JOB_FAILED  )) {
 		pthread_cond_wait(&job->state_cond, &job->state_mutex);
 	}
-	pthread_mutex_unlock(&job->state_mutex);
+	slurm_mutex_unlock(&job->state_mutex);
 
 	/* job is now overdone, clean up  */
 	if (job->state == SRUN_JOB_FAILED) {
@@ -232,18 +227,10 @@ srun(int ac, char **av)
 		error ("Waiting on IO: %m");
 	}
 
+	job_destroy(job);
+
 	/* kill signal thread */
 	pthread_cancel(job->sigid);
-
-    cleanup:
-	if (old_job) {
-		debug("cancelling job step %u.%u", job->jobid, job->stepid);
-		slurm_complete_job_step(job->jobid, job->stepid, 0, 0);
-	} else if (!opt.no_alloc) {
-		debug("cancelling job %u", job->jobid);
-		slurm_complete_job(job->jobid, 0, 0);
-	}
-
 	log_fini();
 
 	exit(0);
@@ -460,23 +447,28 @@ _print_job_information(allocation_resp *resp)
 static int
 _run_batch_job(void)
 {
-	int file_type, rc, retries;
+	int file_type, retries;
+	int rc = SLURM_SUCCESS;
 	job_desc_msg_t job;
 	submit_response_msg_t *resp;
 	extern char **environ;
 	char *job_script;
 
 	if ((remote_argc == 0) || (remote_argv[0] == NULL))
-		return 1;
+		return SLURM_ERROR;
+
 	file_type = _is_file_text (remote_argv[0], NULL);
-	if (file_type == TYPE_NOT_TEXT) {
-		error ("file %s is not script", remote_argv[0]);
-		return 1;
-	}
+
+	/* if (file_type == TYPE_NOT_TEXT) {
+         *	error ("file %s is not script", remote_argv[0]);
+	 *	return SLURM_ERROR;
+	 * }
+	 */
+
 	job_script = _build_script (remote_argv[0], file_type);
 	if (job_script == NULL) {
 		error ("unable to build script from file %s", remote_argv[0]);
-		return 1;
+		return SLURM_ERROR;
 	}
 
 	slurm_init_job_desc_msg(&job);
@@ -531,9 +523,9 @@ _run_batch_job(void)
 	job.work_dir		= opt.cwd;
 
 	retries = 0;
-	while ((rc = slurm_submit_batch_job(&job, &resp)) == SLURM_FAILURE) {
-		if ((slurm_get_errno() == ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) 
-		    && (retries < MAX_RETRIES)) {
+	while ((rc = slurm_submit_batch_job(&job, &resp)) < 0) {
+		if (   (errno == ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) 
+		    && (retries < MAX_RETRIES) ) {
 			if (retries == 0)
 				error ("Slurm controller not responding, "
 						"sleeping and retrying");
@@ -546,12 +538,12 @@ _run_batch_job(void)
 		else {
 			error("Unable to submit batch job resources: %s", 
 					slurm_strerror(errno));
-			return 1;
+			return SLURM_ERROR;
 		}			
 	}
 
 	
-	if (rc == 0) {
+	if (rc == SLURM_SUCCESS) {
 		info("jobid %u submitted",resp->job_id);
 		slurm_free_submit_response_response_msg (resp);
 	}
@@ -570,6 +562,33 @@ _get_shell (void)
 	return pw_ent_ptr->pw_shell;
 }
 
+#define F 0	/* char never appears in text */
+#define T 1	/* character appears in plain ASCII text */
+#define I 2	/* character appears in ISO-8859 text */
+#define X 3     /* character appears in non-ISO extended ASCII */
+static char text_chars[256] = {
+	/*                  BEL BS HT LF    FF CR    */
+	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
+	/*                              ESC          */
+	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
+	/*            NEL                            */
+	X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
+	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
+};
+
+
 /* _is_file_text - determine if specified file is a script
  * shell_ptr - if not NULL, set to pointer to pathname of specified shell 
  *		(if any, ie. return code of 2)
@@ -582,7 +601,12 @@ _is_file_text (char *fname, char **shell_ptr)
 {
 	int buf_size, fd, i;
 	int rc = 1;	/* initially assume the file contains text */
-	char buffer[256];
+	unsigned char buffer[8192];
+
+	if (fname[0] != '/') {
+		info("warning: %s not found in local path", fname);
+		return 0;
+	}
 
 	fd = open(fname, O_RDONLY);
 	if (fd < 0) {
@@ -598,9 +622,7 @@ _is_file_text (char *fname, char **shell_ptr)
 	(void) close (fd);
 
 	for (i=0; i<buf_size; i++) {
-		if (((buffer[i] >= 0x00) && (buffer[i] <= 0x06)) ||
-		    ((buffer[i] >= 0x0e) && (buffer[i] <= 0x1f)) ||
-		     (buffer[i] >= 0x7f)) {
+		if ((int) text_chars[buffer[i]] != T) {
 			rc = 0;
 			break;
 		}
@@ -634,40 +656,45 @@ _is_file_text (char *fname, char **shell_ptr)
 static char *
 _build_script (char *fname, int file_type)
 {
-	char *buffer, *shell;
-	int buf_size, buf_used = 0, fd, data_size, i;
-
-	fd = open(fname, O_RDONLY);
-	if (fd < 0) {
-		error ("Unable to open file %s: %m", fname);
-		return NULL;
+	cbuf_t cb = cbuf_create(512, 1048576);
+	int   fd     = -1;
+	int   i      =  0;
+	char *buffer = NULL;
+
+	if (file_type != 0) {
+		if ((fd = open(fname, O_RDONLY)) < 0) {
+			error ("Unable to open file %s: %m", fname);
+			return NULL;
+		}
 	}
 
-	buf_size = 8096;
-	buffer = xmalloc (buf_size);
-	buf_used = 0;
 	if (file_type != TYPE_SCRIPT) {
-		shell = _get_shell();
-		strcpy (buffer, "#!");
-		strcat (buffer, shell);
-		strcat (buffer, "\n");
-		buf_used = strlen(buffer);
-	}
-
-	while (1) {
-		i = buf_size - buf_used;
-		if (i < 1024) {
-			buf_size += 8096;
-			xrealloc (buffer, buf_size);
-			i = buf_size - buf_used;
+		xstrfmtcat(buffer, "#!%s\n", _get_shell());
+		if (file_type == 0) {
+			xstrcat(buffer, "srun ");
+			for (i = 0; i < remote_argc; i++)
+				xstrfmtcat(buffer, "%s ", remote_argv[i]);
+			xstrcatchar(buffer, '\n');
 		}
-		data_size = read (fd, &buffer[buf_used], i);
-		if (data_size <= 0)
-			break;
-		buf_used += i;
+	} 
+
+	if (file_type != 0) {
+		int len = buffer ? strlen(buffer) : 0;
+		int size;
+
+		if ((size = cbuf_write_from_fd(cb, fd, -1, NULL)) < 0) 
+			error("Unable to read %s", fname);
+		cbuf_write(cb, "\0", 1, NULL);
+
+		xrealloc(buffer, cbuf_used(cb) + len +1);
+
+		cbuf_read(cb, buffer+len, cbuf_used(cb));
+
+		if (close(fd) < 0)
+			error("close: %m");
 	}
-	buffer[buf_used] = '\0';
-	(void) close (fd);
+
+	cbuf_destroy(cb);
 
 	return buffer;
 }
@@ -733,11 +760,11 @@ _set_batch_script_env(uint32_t jobid, uint32_t node_cnt)
 	}
 
 	if (opt.distribution != SRUN_DIST_UNKNOWN) {
-		dist = (opt.distribution == SRUN_DIST_BLOCK) ? 
-							"block" : "cyclic";
+		dist = (opt.distribution == SRUN_DIST_BLOCK) ?  
+		       "block" : "cyclic";
 
 		if (setenvf("SLURM_DISTRIBUTION=%s", dist)) {
-			error("Can't set SLURM_DISTRIBUTION environment variable");
+			error("Can't set SLURM_DISTRIBUTION env variable");
 			return -1;
 		}
 	}
-- 
GitLab