From 9d7a76c94bdc2855c3f99704c066c1432b0b422c Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Mon, 30 Jul 2007 18:35:44 +0000
Subject: [PATCH] svn merge -r11873:11892
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.2

---
 NEWS                    |  1 +
 doc/html/faq.shtml      | 81 ++++++++++++++++++++++++++++++++++++++++-
 doc/html/team.shtml     |  3 +-
 src/common/stepd_api.c  |  5 ++-
 src/scontrol/info_job.c | 13 ++++---
 src/srun/opt.c          |  8 ++--
 6 files changed, 97 insertions(+), 14 deletions(-)

diff --git a/NEWS b/NEWS
index 8662766d220..da481bf465c 100644
--- a/NEWS
+++ b/NEWS
@@ -26,6 +26,7 @@ documents those changes that are of interest to users and admins.
 * Changes in SLURM 1.2.13
 =========================
  -- Add slurm.conf parameter JobFileAppend.
+ -- Fix for segv in "scontrol listpids" on nodes not in SLURM config.
 
 * Changes in SLURM 1.2.12
 =========================
diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml
index c24c321ea07..7d82e817186 100644
--- a/doc/html/faq.shtml
+++ b/doc/html/faq.shtml
@@ -22,6 +22,7 @@ job?</a></li>
 name for a batch job?</a></li>
 <li><a href="#parallel_make">Can the <i>make</i> command utilize the resources 
 allocated to a SLURM job?</a></li>
+<li><a href="#terminal">Can tasks be launched with a remote terminal?</a></li>
 </ol>
 <h2>For Administrators</h2>
 <ol>
@@ -340,6 +341,84 @@ overhead of SLURM's task launch. Use with make's <i>-j</i> option within an
 existing SLURM allocation. Outside of a SLURM allocation, make's behavior
 will be unchanged.</p>
 
+<p><a name="terminal"><b>16. Can tasks be launched with a remote 
+terminal?</b></a><br>
+SLURM does not directly support a remote pseudo terminal for spawned 
+tasks. 
+We intend to remedy this in Slurm version 1.3. 
+Until then, you can accomplish this by starting an appropriate program 
+or script. In the simplest case (X11 over TCP with the DISPLAY 
+environment already set), <i>srun xterm</i> may suffice. In the more 
+general case, the following scripts should work. 
+<b>NOTE: The pathname to the second script is included in the variable
+BS of the first script. You must change this in the first script.</b>
+Execute the script with the sbatch options desired.
+For example, <i>interactive -N2 -pdebug</i>.
+
+<pre>
+#!/bin/bash
+# -*- coding: utf-8 -*-
+# Author: P&auml;r Andersson (National Supercomputer Centre, Sweden)
+# Version: 0.2 2007-07-30
+# 
+# This will submit a batch script that starts screen on a node. 
+# Then ssh is used to connect to the node and attach the screen. 
+# The result is very similar to an interactive shell in PBS 
+# (qsub -I)
+
+# Batch Script that starts SCREEN
+BS=/PATH_TO_BATCH_SCRIPT/_interactive
+
+# Submit the job and get the job id
+JOB=`sbatch -output=/dev/null -error=/dev/null $@ $BS 2>&1 \
+    | egrep -o -e "\b[0-9]+$"`
+
+# Make sure the job is always canceled
+trap "{ /usr/bin/scancel -q $JOB; exit; }" SIGINT SIGTERM EXIT
+
+echo "Waiting for JOBID $JOB to start"
+while true;do
+    sleep 5s
+
+    # Check job status
+    STATUS=`squeue -j $JOB -t PD,R -h -o %t`
+    
+    if [ "$STATUS" = "R" ];then
+	# Job is running, break the while loop
+	break
+    elif [ "$STATUS" != "PD" ];then
+	echo "Job is not Running or Pending. Aborting"
+	scancel $JOB
+	exit 1
+    fi
+    
+    echo -n "."
+    
+done
+
+# Determine the first node in the job:
+NODE=`srun --jobid=$JOB -N1 hostname`
+
+# SSH to the node and attach the screen
+sleep 1s
+ssh -t $NODE screen -rd -S slurm$JOB
+# The trap will now cancel the job before exiting.
+</pre>
+
+<p>NOTE: The above script executes the script below, 
+named <i>_interactive<i>.</p>
+<pre>
+#!/bin/sh
+# -*- coding: utf-8 -*-
+# Author: P&auml;r Andersson  (National Supercomputer Centre, Sweden)
+# Version: 0.2 2007-07-30
+# 
+# Simple batch script that starts SCREEN.
+
+exec screen -Dm -S slurm$SLURM_JOBID
+</pre>
+
+
 <p class="footer"><a href="#top">top</a></p>
 
 <h2>For Administrators</h2>
@@ -711,6 +790,6 @@ about these options.
 
 <p class="footer"><a href="#top">top</a></p>
 
-<p style="text-align:center;">Last modified 15 June 2007</p>
+<p style="text-align:center;">Last modified 30 July 2007</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/html/team.shtml b/doc/html/team.shtml
index d3946e6fa5d..ab90d33f1b9 100644
--- a/doc/html/team.shtml
+++ b/doc/html/team.shtml
@@ -18,6 +18,7 @@
 <p> SLURM contributers include: </p>
 <ul>
 <li>Amjad Majid Ali (Colorado State University)</li>
+<li>P&auml;r Andersson (National Supercomputer Centre, Sweden)</li>
 <li>Don Albert (Bull)</li>
 <li>Ernest Artiaga (Barcelona Supercomputer Center, Spain)</li>
 <li>Anton Blanchard (Samba)</li>
@@ -55,6 +56,6 @@ Networking, Italy)</li>
 <li>Anne-Marie Wunderlin (Bull)</li>
 </ul>
 
-<p style="text-align:center;">Last modified 9 July 2007</p>
+<p style="text-align:center;">Last modified 26 July 2007</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c
index 8f8fafae1a6..faebb0d4974 100644
--- a/src/common/stepd_api.c
+++ b/src/common/stepd_api.c
@@ -162,7 +162,7 @@ _guess_nodename()
 	char host[256];
 	char *nodename = NULL;
 
-	if (gethostname_short(host, 256) != 0)
+	if (gethostname_short(host, 256) != 0) 
 		return NULL;
 
 	nodename = slurm_conf_get_nodename(host);
@@ -483,7 +483,8 @@ stepd_available(const char *directory, const char *nodename)
 	struct stat stat_buf;
 
 	if (nodename == NULL) {
-		nodename = _guess_nodename();
+		if (!(nodename = _guess_nodename()))
+			return NULL;
 	}
 	if (directory == NULL) {
 		slurm_ctl_conf_t *cf;
diff --git a/src/scontrol/info_job.c b/src/scontrol/info_job.c
index 7aa06b49156..fc99620c993 100644
--- a/src/scontrol/info_job.c
+++ b/src/scontrol/info_job.c
@@ -499,10 +499,10 @@ _list_pids_all_steps(const char *node_name, uint32_t jobid)
 	int count = 0;
 
 	steps = stepd_available(NULL, node_name);
-	if (list_count(steps) == 0) {
-		fprintf(stderr, "Job %u does not exist on this node.\n",
-			jobid);
-		list_destroy(steps);
+	if (!steps || list_count(steps) == 0) {
+		fprintf(stderr, "Job %u does not exist on this node.\n", jobid);
+		if (steps) 
+			list_destroy(steps);
 		exit_code = 1;
 		return;
 	}
@@ -533,9 +533,10 @@ _list_pids_all_jobs(const char *node_name)
 	step_loc_t *stepd;
 
 	steps = stepd_available(NULL, node_name);
-	if (list_count(steps) == 0) {
+	if (!steps || list_count(steps) == 0) {
 		fprintf(stderr, "No job steps exist on this node.\n");
-		list_destroy(steps);
+		if (steps)
+			list_destroy(steps);
 		exit_code = 1;
 		return;
 	}
diff --git a/src/srun/opt.c b/src/srun/opt.c
index 8667fdcf32f..b5b9717d283 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -2693,11 +2693,11 @@ static void _help(void)
 "\n"
 "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" 
 "  -B --extra-node-info=S[:C[:T]]            Expands to:\n"
-"      --sockets-per-node=S      number of sockets per node to allocate\n"
-"      --cores-per-socket=C      number of cores per socket to allocate\n"
-"      --threads-per-core=T      number of threads per core to allocate\n"
+"      --sockets-per-node=S    number of sockets per node to allocate\n"
+"      --cores-per-socket=C    number of cores per socket to allocate\n"
+"      --threads-per-core=T    number of threads per core to allocate\n"
 "                              each field can be 'min[-max]' or wildcard '*'\n"
-"                                total cpus requested = (N x S x C x T)\n"
+"                              total cpus requested = (N x S x C x T)\n"
 "\n"
 "      --ntasks-per-socket=n   number of tasks to invoke on each socket\n"
 "      --ntasks-per-core=n     number of tasks to invoke on each core\n"
-- 
GitLab