diff --git a/NEWS b/NEWS index d24214e01626d0ac4d5218685746e7e6c41ce07e..62e469ebdbb3f850f308d2afa99344061a61df62 100644 --- a/NEWS +++ b/NEWS @@ -400,6 +400,7 @@ documents those changes that are of interest to users and admins. * Changes in Slurm 2.6.10 ========================= + -- Switch/nrt - On switch resource allocation failure, free partial allocation. * Changes in Slurm 2.6.9 ======================== diff --git a/src/plugins/switch/nrt/nrt.c b/src/plugins/switch/nrt/nrt.c index 4a18384f3f6ad4d2b01acdf73aaca466f04b6ab1..769b0665bc0c18e480704ecfe284a773ade02bfa 100644 --- a/src/plugins/switch/nrt/nrt.c +++ b/src/plugins/switch/nrt/nrt.c @@ -1172,7 +1172,7 @@ _allocate_windows_all(slurm_nrt_jobinfo_t *jp, char *hostname, continue; if ((context_id == 0) && (_add_block_use(jp, adapter))) { - return SLURM_ERROR; + goto alloc_fail; } for (j = 0; j < instances; j++) { table_id++; @@ -1180,7 +1180,7 @@ _allocate_windows_all(slurm_nrt_jobinfo_t *jp, char *hostname, if (table_inx >= jp->tables_per_task) { error("switch/nrt: adapter count too " "high, host=%s", hostname); - return SLURM_ERROR; + goto alloc_fail; } if (user_space) { window = _find_free_window(adapter); @@ -1190,11 +1190,11 @@ _allocate_windows_all(slurm_nrt_jobinfo_t *jp, char *hostname, "node %s adapter %s", node->name, adapter->adapter_name); - return SLURM_ERROR; + goto alloc_fail; } if (_add_immed_use(hostname, jp, adapter)) - return SLURM_ERROR; + goto alloc_fail; window->state = NRT_WIN_UNAVAILABLE; window->job_key = job_key; } @@ -1260,7 +1260,7 @@ _allocate_windows_all(slurm_nrt_jobinfo_t *jp, char *hostname, "for adapter type %s", _adapter_type_str(adapter-> adapter_type)); - return SLURM_ERROR; + goto alloc_fail; } strncpy(tableinfo[table_inx].adapter_name, @@ -1286,10 +1286,16 @@ _allocate_windows_all(slurm_nrt_jobinfo_t *jp, char *hostname, /* This node has too few adapters of this type */ error("switch/nrt: adapter count too low, host=%s", hostname); drain_nodes(hostname, "Too few switch adapters", 0); - return SLURM_ERROR; + goto alloc_fail; } return SLURM_SUCCESS; + +alloc_fail: + /* Unable to allocate all necessary resources. + * Free what has been allocated so far. */ + _free_resources_by_job(jp, hostname); + return SLURM_ERROR; } @@ -1366,7 +1372,7 @@ _allocate_window_single(char *adapter_name, slurm_nrt_jobinfo_t *jp, context_id++) { if ((context_id == 0) && (_add_block_use(jp, adapter))) { - return SLURM_ERROR; + goto alloc_fail; } for (table_id = 0; table_id < instances; table_id++) { table_inx++; @@ -1378,10 +1384,10 @@ _allocate_window_single(char *adapter_name, slurm_nrt_jobinfo_t *jp, "on node %s adapter %s", node->name, adapter->adapter_name); - return SLURM_ERROR; + goto alloc_fail; } if (_add_immed_use(hostname, jp, adapter)) - return SLURM_ERROR; + goto alloc_fail; window->state = NRT_WIN_UNAVAILABLE; window->job_key = job_key; } @@ -1442,7 +1448,7 @@ _allocate_window_single(char *adapter_name, slurm_nrt_jobinfo_t *jp, } else { error("Missing support for adapter type %s", _adapter_type_str(adapter_type)); - return SLURM_ERROR; + goto alloc_fail; } strncpy(tableinfo[table_inx].adapter_name, adapter_name, @@ -1461,6 +1467,12 @@ _allocate_window_single(char *adapter_name, slurm_nrt_jobinfo_t *jp, } /* for each context */ return SLURM_SUCCESS; + +alloc_fail: + /* Unable to allocate all necessary resources. + * Free what has been allocated so far. */ + _free_resources_by_job(jp, hostname); + return SLURM_ERROR; } static char *