diff --git a/NEWS b/NEWS index cd2f35818f4cb1739f17204fbc1b02eeca747b79..d24214e01626d0ac4d5218685746e7e6c41ce07e 100644 --- a/NEWS +++ b/NEWS @@ -103,6 +103,10 @@ documents those changes that are of interest to users and admins. -- When sorting jobs and priorities are equal sort by job_id. -- Do not overwrite existing reason for node being down or drained. -- Requeue batch job if Munge is down and credential can not be created. + -- Make _slurm_init_msg_engine() tolerate bug in bind() returning a busy + ephemeral port. + -- Don't block scheduling of entire job array if it could run in multiple + partitions. * Changes in Slurm 14.03.0 ========================== diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 3a34d941b3338421c0d41f8dc93ff19a7b9cf16d..2b9bd2fddcbf7e7ab202095cf21dd4a23e115cfe 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -2186,17 +2186,30 @@ static void _remap_slurmctld_errno(void) \**********************************************************************/ /* In the socket implementation it creates a socket, binds to it, and - * listens for connections. + * listens for connections. Retry if bind() or listen() fail + * even if asked for an ephemeral port. * * IN port - port to bind the msg server to * RET slurm_fd_t - file descriptor of the connection created */ slurm_fd_t slurm_init_msg_engine_port(uint16_t port) { + slurm_fd_t cc; slurm_addr_t addr; + int cnt; + cnt = 0; +eagain: slurm_set_addr_any(&addr, port); - return _slurm_init_msg_engine(&addr); + cc = _slurm_init_msg_engine(&addr); + if (cc < 0 && port == 0) { + ++cnt; + if (cnt <= 5) { + usleep(5000); + goto eagain; + } + } + return cc; } /* In the socket implementation it creates a socket, binds to it, and diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 26f1744a05ea317cddd5816908c7496a27c6118c..ad0bf00d979047c8dd55f505e2171acfb59ba0c0 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -663,6 +663,7 @@ static int _attempt_backfill(void) uint16_t *njobs = NULL; bool already_counted; uint32_t reject_array_job_id = 0; + struct part_record *reject_array_part = NULL; uint32_t job_start_cnt = 0; time_t config_update = slurmctld_conf.last_update; time_t part_update = last_part_update; @@ -793,10 +794,12 @@ static int _attempt_backfill(void) if (!avail_front_end(job_ptr)) continue; /* No available frontend for this job */ if (job_ptr->array_task_id != NO_VAL) { - if (reject_array_job_id == job_ptr->array_job_id) + if ((reject_array_job_id == job_ptr->array_job_id) && + (reject_array_part == part_ptr)) continue; /* already rejected array element */ /* assume reject whole array for now, clear if OK */ reject_array_job_id = job_ptr->array_job_id; + reject_array_part = part_ptr; } job_ptr->part_ptr = part_ptr; @@ -1092,6 +1095,7 @@ static int _attempt_backfill(void) } else { /* Started this job, move to next one */ reject_array_job_id = 0; + reject_array_part = NULL; /* Update the database if job time limit * changed and move to next job */ @@ -1140,6 +1144,7 @@ static int _attempt_backfill(void) if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) continue; reject_array_job_id = 0; + reject_array_part = NULL; if (debug_flags & DEBUG_FLAG_BACKFILL) _dump_job_sched(job_ptr, end_reserve, avail_bitmap); xfree(job_ptr->sched_nodes); diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index acdf1a7e36f3439cb74819c305dc7b84239a7b1b..b10499f2d617def7a0a707dc9e91b204d364d246 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -749,6 +749,7 @@ extern int schedule(uint32_t job_limit) static int defer_rpc_cnt = 0; time_t now, sched_start; uint32_t reject_array_job_id = 0; + struct part_record *reject_array_part = NULL; DEF_TIMERS; if (sched_update != slurmctld_conf.last_update) { @@ -986,10 +987,13 @@ next_part: part_ptr = (struct part_record *) } if (job_ptr->array_task_id != NO_VAL) { - if (reject_array_job_id == job_ptr->array_job_id) + if ((reject_array_job_id == job_ptr->array_job_id) && + (reject_array_part == job_ptr->part_ptr)) continue; /* already rejected array element */ + /* assume reject whole array for now, clear if OK */ reject_array_job_id = job_ptr->array_job_id; + reject_array_part = job_ptr->part_ptr; } if (max_jobs_per_part) { bool skip_job = false; @@ -1273,6 +1277,7 @@ next_part: part_ptr = (struct part_record *) rebuild_job_part_list(job_ptr); job_cnt++; reject_array_job_id = 0; + reject_array_part = NULL; } else if ((error_code == ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE) && job_ptr->part_ptr_list) {