From f1981eaba2904e4f8cf4e15d674c88576cf18608 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Mon, 23 Feb 2009 18:23:22 +0000 Subject: [PATCH] add some port reservation logic --- slurm/slurm_errno.h | 2 ++ src/common/slurm_errno.c | 7 ++++- src/slurmctld/port_mgr.c | 67 ++++++++++++++++++++++++++++++++++++++-- src/srun/allocate.c | 3 +- 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h index 4ad552d7f43..a2108f05fc0 100644 --- a/slurm/slurm_errno.h +++ b/slurm/slurm_errno.h @@ -168,6 +168,8 @@ enum { ESLURM_RESERVATION_NOT_USABLE, ESLURM_INVALID_WCKEY, ESLURM_RESERVATION_OVERLAP, + ESLURM_PORTS_BUSY, + ESLURM_PORTS_INVALID, /* switch specific error codes, specific values defined in plugin module */ ESLURM_SWITCH_MIN = 3000, diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c index 309e7c9a037..c4acb1adf85 100644 --- a/src/common/slurm_errno.c +++ b/src/common/slurm_errno.c @@ -1,7 +1,8 @@ /*****************************************************************************\ * slurm_errno.c - error codes and functions for slurm ****************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008-2009 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Jim Garlick <garlick@llnl.gov>, et. al. * CODE-OCEC-09-009. All rights reserved. @@ -237,6 +238,10 @@ static slurm_errtab_t slurm_errtab[] = { "Requested reservation not usable now" }, { ESLURM_RESERVATION_OVERLAP, "Requested reservation overlaps with another reservation" }, + { ESLURM_PORTS_BUSY, + "Requires ports are in use" }, + { ESLURM_PORTS_INVALID, + "Requires more ports than can be reserved" }, /* slurmd error codes */ diff --git a/src/slurmctld/port_mgr.c b/src/slurmctld/port_mgr.c index 8145cbba244..a87d3ece2fe 100644 --- a/src/slurmctld/port_mgr.c +++ b/src/slurmctld/port_mgr.c @@ -41,14 +41,77 @@ # include "config.h" #endif +#include "src/common/bitstring.h" +#include "src/common/hostlist.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" + #include "src/slurmctld/slurmctld.h" +bitstr_t **port_resv_table = (bitstr_t **) NULL; +int port_resv_cnt = 0; +int port_resv_min = 0; +int port_resv_max = 0; + /* Reserve ports for a job step * RET SLURM_SUCCESS or an error code */ extern int reserve_ports(struct step_record *step_ptr) { -info("reserving %u ports", step_ptr->resv_port_cnt); -step_ptr->resv_ports = xstrdup("123-456"); + int i, port_inx; + int *port_array = NULL; + char port_str[16]; + hostlist_t hl; + + if (step_ptr->resv_port_cnt > port_resv_cnt) { + info("step %u.%u needs %u reserved ports, but only %d exist", + step_ptr->job_ptr->job_id, step_ptr->step_id, + step_ptr->resv_port_cnt, port_resv_cnt); + return ESLURM_PORTS_INVALID; + } + + /* Identify available ports */ + port_array = xmalloc(sizeof(int) * step_ptr->resv_port_cnt); + port_inx = 0; + for (i=0; i<port_resv_cnt; i++) { + if (bit_overlap(step_ptr->step_node_bitmap, + port_resv_table[i])) + continue; + port_array[port_inx++] = i; + if (port_inx >= step_ptr->resv_port_cnt) + break; + } + if (port_inx < step_ptr->resv_port_cnt) { + info("insufficient ports for step %u.%u to reserve (%d of %u)", + step_ptr->job_ptr->job_id, step_ptr->step_id, + port_inx, step_ptr->resv_port_cnt); + xfree(port_array); + return ESLURM_PORTS_BUSY; + } + + /* Reserve selected ports */ + hl = hostlist_create(NULL); + if (hl == NULL) + fatal("malloc: hostlist_create"); + for (i=0; i<port_inx; i++) { + bit_or(port_resv_table[port_array[i]], + step_ptr->step_node_bitmap); + snprintf(port_str, sizeof(port_str), + "%d", (port_array[i] + port_resv_min)); + hostlist_push(hl, port_str); + } + hostlist_sort(hl); + for (i=1024; ; i*=2) { + step_ptr->resv_ports = xmalloc(i); + if (hostlist_ranged_string(hl, i, step_ptr->resv_ports) >= 0) + break; + xfree(step_ptr->resv_ports); + } + hostlist_destroy(hl); + xfree(port_array); + info("reserved ports %s for step %u.%u", + step_ptr->resv_ports, + step_ptr->job_ptr->job_id, step_ptr->step_id); + return SLURM_SUCCESS; } diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 9a9670255d4..e02573d9cd8 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -596,7 +596,8 @@ create_job_step(srun_job_t *job, bool use_all_cpus) rc = slurm_get_errno(); if (opt.immediate || - ((rc != ESLURM_NODES_BUSY) && (rc != ESLURM_DISABLED))) { + ((rc != ESLURM_NODES_BUSY) && (rc != ESLURM_PORTS_BUSY) && + (rc != ESLURM_DISABLED))) { error ("Unable to create job step: %m"); return -1; } -- GitLab