From 4ea9850a4a702402c534861ae4a2729c241dfffd Mon Sep 17 00:00:00 2001
From: Hongjia Cao <hjcao@nudt.edu.cn>
Date: Tue, 19 Mar 2013 13:59:18 -0700
Subject: [PATCH] fix of idle nodes cannot be allocated

avoid add/remove node resource of job if the node is lost by resize

 I found another case that idle node can not be allocated. It can be
reproduced as follows:

1. run a job with -k option:

    [root@mn0 ~]# srun -w cn[18-28] -k sleep 1000
    srun: error: Node failure on cn28
    srun: error: Node failure on cn28
    srun: error: cn28: task 10: Killed
    ^Csrun: interrupt (one more within 1 sec to abort)
    srun: tasks 0-9: running
    srun: task 10: exited abnormally
    ^Csrun: sending Ctrl-C to job 106120.0
    srun: Job step aborted: Waiting up to 2 seconds for job step to
finish.

2. set a node down and then set it idle:

    [root@mn0 ~]# scontrol update nodename=cn28 state=down reason="hjcao
test"
    [root@mn0 ~]# scontrol update nodename=cn28 state=idle

3. restart slurmctld

    [root@mn0 ~]# service slurm restart
    stopping slurmctld:                                        [  OK  ]
    slurmctld is stopped
    starting slurmctld:                                        [  OK  ]

4. cancel the job

then, the node set down will be left unavailable:

    [root@mn0 ~]# sinfo -n cn[18-28]
    PARTITION AVAIL  TIMELIMIT  NODES  STATE NODELIST
    work*        up   infinite     11   idle cn[18-28]

    [root@mn0 ~]# srun -w cn[18-28] hostname
    srun: job 106122 queued and waiting for resources

    [root@mn0 slurm]# grep cn28 slurmctld.log
    [2013-03-18T15:28:02+08:00] debug3: cons_res: _vns: node cn28 in
exclusive use
    [2013-03-18T15:29:02+08:00] debug3: cons_res: _vns: node cn28 in
exclusive use

I made an attempt to fix this by the attached patch. Please review it.
---
 src/plugins/select/cons_res/select_cons_res.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index 4c5cc8777d8..fad2209b7ba 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -804,6 +804,8 @@ static int _add_job_to_res(struct job_record *job_ptr, int action)
 		if (!bit_test(job->node_bitmap, i))
 			continue;
 		n++;
+		if (job->cpus[n] == 0)
+			continue;  /* node lost by job resize */
 
 		node_ptr = select_node_record[i].node_ptr;
 		if (action != 2) {
@@ -872,10 +874,14 @@ static int _add_job_to_res(struct job_record *job_ptr, int action)
 			_add_job_to_row(job, &(p_ptr->row[p_ptr->num_rows-1]));
 		}
 		/* update the node state */
-		for (i = 0; i < select_node_cnt; i++) {
-			if (bit_test(job->node_bitmap, i))
+		for (i = 0, n = -1; i < select_node_cnt; i++) {
+			if (bit_test(job->node_bitmap, i)) {
+				n++;
+				if (job->cpus[n] == 0)
+					continue;  /* node lost by job resize */
 				select_node_usage[i].node_state +=
 					job->node_req;
+			}
 		}
 		if (select_debug_flags & DEBUG_FLAG_CPU_BIND) {
 			info("DEBUG: _add_job_to_res (after):");
@@ -1150,6 +1156,8 @@ static int _rm_job_from_res(struct part_res_record *part_record_ptr,
 		if (!bit_test(job->node_bitmap, i))
 			continue;
 		n++;
+		if (job->cpus[n] == 0)
+			continue;  /* node lost by job resize */
 
 		node_ptr = node_record_table_ptr + i;
 		if (action != 2) {
-- 
GitLab