From 43378a5ea35efb0188e642d88c01d47eff9d2d71 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Mon, 20 Sep 2004 16:42:39 +0000 Subject: [PATCH] Restructure some code so that the bgl_part_id can be picked-up and reported as desired. Add new function to drain node (for use by select/bluegene node monitoring thread). --- src/slurmctld/job_mgr.c | 99 +++++---------------- src/slurmctld/node_mgr.c | 2 +- src/slurmctld/proc_req.c | 181 ++++++++++++++++++-------------------- src/slurmctld/slurmctld.h | 39 ++++---- src/slurmctld/srun_comm.c | 2 +- 5 files changed, 126 insertions(+), 197 deletions(-) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 94856abe7bb..f5ee6acc544 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -97,8 +97,7 @@ static void _excise_node_from_job(struct job_record *job_ptr, static int _find_batch_dir(void *x, void *key); static void _get_batch_job_dir_ids(List batch_dirs); static void _job_timed_out(struct job_record *job_ptr); -static int _job_create(job_desc_msg_t * job_specs, uint32_t * new_job_id, - int allocate, int will_run, +static int _job_create(job_desc_msg_t * job_specs, int allocate, int will_run, struct job_record **job_rec_ptr, uid_t submit_uid); static void _list_delete_job(void *job_entry); static int _list_find_job_id(void *job_entry, void *key); @@ -1237,21 +1236,12 @@ extern void rehash_jobs(void) * job_allocate - create job_records for the suppied job specification and * allocate nodes for it. * IN job_specs - job specifications - * IN node_list - location for storing new job's allocated nodes * IN immediate - if set then either initiate the job immediately or fail * IN will_run - don't initiate the job if set, just test if it could run * now or later * IN allocate - resource allocation request if set, not a full job - * OUT new_job_id - the new job's ID - * OUT num_cpu_groups - number of cpu groups (elements in cpus_per_node - * and cpu_count_reps) - * OUT cpus_per_node - pointer to array of numbers of cpus on each node - * allocate - * OUT cpu_count_reps - pointer to array of numbers of consecutive nodes - * having same cpu count - * OUT node_list - list of nodes allocated to the job - * OUT node_cnt - number of allocated nodes - * OUT node_addr - slurm_addr's for the allocated nodes + * IN submit_uid -uid of user issuing the request + * OUT job_pptr - set to pointer to job record * RET 0 or an error code. If the job would only be able to execute with * some change in partition configuration then * ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE is returned @@ -1263,18 +1253,16 @@ extern void rehash_jobs(void) * default_part_loc - pointer to default partition * NOTE: lock_slurmctld on entry: Read config Write job, Write node, Read part */ -int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, - char **node_list, uint16_t * num_cpu_groups, - uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, - int immediate, int will_run, int allocate, - uid_t submit_uid, uint16_t * node_cnt, - slurm_addr ** node_addr) +extern int job_allocate(job_desc_msg_t * job_specs, int immediate, int will_run, + int allocate, uid_t submit_uid, struct job_record **job_pptr) { int error_code; bool no_alloc, top_prio, test_only, too_fragmented, independent; struct job_record *job_ptr; - error_code = _job_create(job_specs, new_job_id, allocate, will_run, + error_code = _job_create(job_specs, allocate, will_run, &job_ptr, submit_uid); + *job_pptr = job_ptr; + if (error_code) { if (immediate && job_ptr) { job_ptr->job_state = JOB_FAILED; @@ -1284,9 +1272,7 @@ int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, } return error_code; } - if (job_ptr == NULL) - fatal("job_allocate: allocated job %u lacks record", - new_job_id); + xassert(job_ptr); independent = job_independent(job_ptr); @@ -1322,22 +1308,8 @@ int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, } test_only = will_run || (allocate == 0); - if (!test_only) { - /* Some of these pointers are NULL on submit */ - if (num_cpu_groups) - *num_cpu_groups = 0; - if (node_list) - *node_list = NULL; - if (cpus_per_node) - *cpus_per_node = NULL; - if (cpu_count_reps) - *cpu_count_reps = NULL; - if (node_cnt) - *node_cnt = 0; - if (node_addr) - *node_addr = (slurm_addr *) NULL; + if (!test_only) last_job_update = time(NULL); - } no_alloc = test_only || too_fragmented || (!top_prio) || (!independent); @@ -1371,21 +1343,6 @@ int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, job_ptr->end_time = 0; } - if (!no_alloc) { - if (node_list) - *node_list = job_ptr->nodes; - if (num_cpu_groups) - *num_cpu_groups = job_ptr->num_cpu_groups; - if (cpus_per_node) - *cpus_per_node = job_ptr->cpus_per_node; - if (cpu_count_reps) - *cpu_count_reps = job_ptr->cpu_count_reps; - if (node_cnt) - *node_cnt = job_ptr->node_cnt; - if (node_addr) - *node_addr = job_ptr->node_addr; - } - return SLURM_SUCCESS; } @@ -1575,7 +1532,6 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, * input: job_specs - job specifications * IN allocate - resource allocation request if set rather than job submit * IN will_run - job is not to be created, test of validity only - * OUT new_job_id - the job's ID * OUT job_pptr - pointer to the job (NULL on error) * RET 0 on success, otherwise ESLURM error code. If the job would only be * able to execute with some change in partition configuration then @@ -1586,8 +1542,7 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, * job_hash - hash table into job records */ -static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, - int allocate, int will_run, +static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, struct job_record **job_pptr, uid_t submit_uid) { int error_code = SLURM_SUCCESS, i; @@ -1760,7 +1715,6 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, (*job_pptr)->batch_flag = 1; } else (*job_pptr)->batch_flag = 0; - *new_job_id = (*job_pptr)->job_id; /* Insure that requested partition is valid right now, * otherwise leave job queued and provide warning code */ @@ -1772,19 +1726,20 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, if ((!super_user) && (job_desc->min_nodes > part_ptr->max_nodes)) { info("Job %u requested too many nodes (%d) of " - "partition %s(%d)", *new_job_id, job_desc->min_nodes, + "partition %s(%d)", + (*job_pptr)->job_id, job_desc->min_nodes, part_ptr->name, part_ptr->max_nodes); fail_reason = WAIT_PART_NODE_LIMIT; } else if ((!super_user) && (job_desc->max_nodes != 0) && /* no max_nodes for job */ (job_desc->max_nodes < part_ptr->min_nodes)) { info("Job %u requested too few nodes (%d) of partition %s(%d)", - *new_job_id, job_desc->max_nodes, - part_ptr->name, part_ptr->min_nodes); + (*job_pptr)->job_id, job_desc->max_nodes, + part_ptr->name, part_ptr->min_nodes); fail_reason = WAIT_PART_NODE_LIMIT; } else if (part_ptr->state_up == 0) { info("Job %u requested down partition %s", - *new_job_id, part_ptr->name); + (*job_pptr)->job_id, part_ptr->name); fail_reason = WAIT_PART_STATE; } if (fail_reason != WAIT_NO_REASON) { @@ -3382,13 +3337,10 @@ kill_job_on_node(uint32_t job_id, struct node_record *node_ptr) * old_job_info - get details about an existing job allocation * IN uid - job issuing the code * IN job_id - ID of job for which info is requested - * OUT everything else - the job's details + * OUT job_pptr - set to pointer to job record */ -int -old_job_info(uint32_t uid, uint32_t job_id, char **node_list, - uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, - uint32_t ** cpu_count_reps, uint16_t * node_cnt, - slurm_addr ** node_addr) +extern int +old_job_info(uint32_t uid, uint32_t job_id, struct job_record **job_pptr) { struct job_record *job_ptr; @@ -3403,18 +3355,7 @@ old_job_info(uint32_t uid, uint32_t job_id, char **node_list, if (IS_JOB_FINISHED(job_ptr)) return ESLURM_ALREADY_DONE; - if (node_list) - *node_list = job_ptr->nodes; - if (num_cpu_groups) - *num_cpu_groups = job_ptr->num_cpu_groups; - if (cpus_per_node) - *cpus_per_node = job_ptr->cpus_per_node; - if (cpu_count_reps) - *cpu_count_reps = job_ptr->cpu_count_reps; - if (node_cnt) - *node_cnt = job_ptr->node_cnt; - if (node_addr) - *node_addr = job_ptr->node_addr; + *job_pptr = job_ptr; return SLURM_SUCCESS; } diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index a6a26950583..de974696ecd 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -841,7 +841,7 @@ void set_slurmd_addr (void) /* * update_node - update the configuration data for one or more nodes * IN update_node_msg - update node request - * RET 0 or error code + * RET SLURM_SUCCESS or error code * global: node_record_table_ptr - pointer to global node table */ int update_node ( update_node_msg_t * update_node_msg ) diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 56e71ae7907..a3f0a411b71 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -358,19 +358,14 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) slurm_msg_t response_msg; DEF_TIMERS; job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; - char *node_list_ptr = NULL; - uint16_t num_cpu_groups = 0; - uint32_t *cpus_per_node = NULL, *cpu_count_reps = NULL; - uint32_t job_id = 0; resource_allocation_response_msg_t alloc_msg; /* Locks: Read config, write job, write node, read partition */ slurmctld_lock_t job_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; uid_t uid; - uint16_t node_cnt = 0; - slurm_addr *node_addr = NULL; int immediate = job_desc_msg->immediate; bool do_unlock = false; + struct job_record *job_ptr; START_TIMER; debug2("Processing RPC: REQUEST_RESOURCE_ALLOCATION"); @@ -387,11 +382,8 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) if (error_code == SLURM_SUCCESS) { do_unlock = true; lock_slurmctld(job_write_lock); - error_code = job_allocate(job_desc_msg, &job_id, - &node_list_ptr, &num_cpu_groups, - &cpus_per_node, &cpu_count_reps, - immediate, false, true, uid, - &node_cnt, &node_addr); + error_code = job_allocate(job_desc_msg, + immediate, false, true, uid, &job_ptr); /* unlock after finished using the job structure data */ END_TIMER; } @@ -400,29 +392,30 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) if ((error_code == SLURM_SUCCESS) || ((immediate == 0) && (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE))) { + xassert(job_ptr); info("_slurm_rpc_allocate_resources JobId=%u NodeList=%s %s", - job_id, node_list_ptr, TIME_STR); + job_ptr->job_id, job_ptr->nodes, TIME_STR); /* send job_ID and node_name_ptr */ alloc_msg.cpu_count_reps = xmalloc(sizeof(uint32_t) * - num_cpu_groups); - memcpy(alloc_msg.cpu_count_reps, cpu_count_reps, - (sizeof(uint32_t) * num_cpu_groups)); + job_ptr->num_cpu_groups); + memcpy(alloc_msg.cpu_count_reps, job_ptr->cpu_count_reps, + (sizeof(uint32_t) * job_ptr->num_cpu_groups)); alloc_msg.cpus_per_node = xmalloc(sizeof(uint32_t) * - num_cpu_groups); - memcpy(alloc_msg.cpus_per_node, cpus_per_node, - (sizeof(uint32_t) * num_cpu_groups)); + job_ptr->num_cpu_groups); + memcpy(alloc_msg.cpus_per_node, job_ptr->cpus_per_node, + (sizeof(uint32_t) * job_ptr->num_cpu_groups)); alloc_msg.error_code = error_code; - alloc_msg.job_id = job_id; + alloc_msg.job_id = job_ptr->job_id; alloc_msg.node_addr = xmalloc(sizeof(slurm_addr) * - node_cnt); - memcpy(alloc_msg.node_addr, node_addr, - (sizeof(slurm_addr) * node_cnt)); - alloc_msg.node_cnt = node_cnt; - alloc_msg.node_list = xstrdup(node_list_ptr); - alloc_msg.num_cpu_groups = num_cpu_groups; + job_ptr->node_cnt); + memcpy(alloc_msg.node_addr, job_ptr->node_addr, + (sizeof(slurm_addr) * job_ptr->node_cnt)); + alloc_msg.node_cnt = job_ptr->node_cnt; + alloc_msg.node_list = xstrdup(job_ptr->nodes); + alloc_msg.num_cpu_groups = job_ptr->num_cpu_groups; #ifdef HAVE_BGL - alloc_msg.bgl_part_id = xstrdup(DEFAULT_BGL_PART_ID); + alloc_msg.bgl_part_id = xstrdup(job_ptr->bgl_part_id); #endif unlock_slurmctld(job_write_lock); @@ -430,7 +423,7 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) response_msg.data = &alloc_msg; if (slurm_send_node_msg(msg->conn_fd, &response_msg) < 0) - _kill_job_on_msg_fail(job_id); + _kill_job_on_msg_fail(job_ptr->job_id); xfree(alloc_msg.cpu_count_reps); xfree(alloc_msg.cpus_per_node); xfree(alloc_msg.node_addr); @@ -458,20 +451,15 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) slurm_msg_t response_msg; DEF_TIMERS; job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; - char *node_list_ptr = NULL; - uint16_t num_cpu_groups = 0; - uint32_t *cpus_per_node = NULL, *cpu_count_reps = NULL; - uint32_t job_id; resource_allocation_and_run_response_msg_t alloc_msg; struct step_record *step_rec; + struct job_record *job_ptr; slurm_cred_t slurm_cred; job_step_create_request_msg_t req_step_msg; /* Locks: Write job, write node, read partition */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; uid_t uid; - uint16_t node_cnt; - slurm_addr *node_addr; int immediate = true; /* implicit job_desc_msg->immediate == true */ START_TIMER; @@ -497,11 +485,8 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) #endif lock_slurmctld(job_write_lock); - error_code = job_allocate(job_desc_msg, &job_id, - &node_list_ptr, &num_cpu_groups, - &cpus_per_node, &cpu_count_reps, - immediate, false, true, uid, - &node_cnt, &node_addr); + error_code = job_allocate(job_desc_msg, + immediate, false, true, uid, &job_ptr); /* return result */ if (error_code) { @@ -512,7 +497,7 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) return; } - req_step_msg.job_id = job_id; + req_step_msg.job_id = job_ptr->job_id; req_step_msg.user_id = job_desc_msg->user_id; #ifdef HAVE_BGL req_step_msg.node_count = 1; @@ -531,7 +516,7 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) /* note: no need to free step_rec, pointer to global job step record */ if (error_code) { - job_complete(job_id, job_desc_msg->user_id, false, 0); + job_complete(job_ptr->job_id, job_desc_msg->user_id, false, 0); unlock_slurmctld(job_write_lock); info("_slurm_rpc_allocate_and_run creating job step: %s", slurm_strerror(error_code)); @@ -539,17 +524,17 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) } else { info("_slurm_rpc_allocate_and_run JobId=%u NodeList=%s %s", - job_id, node_list_ptr, TIME_STR); + job_ptr->job_id, job_ptr->nodes, TIME_STR); /* send job_ID and node_name_ptr */ - alloc_msg.job_id = job_id; - alloc_msg.node_list = node_list_ptr; - alloc_msg.num_cpu_groups = num_cpu_groups; - alloc_msg.cpus_per_node = cpus_per_node; - alloc_msg.cpu_count_reps = cpu_count_reps; + alloc_msg.job_id = job_ptr->job_id; + alloc_msg.node_list = job_ptr->nodes; + alloc_msg.num_cpu_groups = job_ptr->num_cpu_groups; + alloc_msg.cpus_per_node = job_ptr->cpus_per_node; + alloc_msg.cpu_count_reps = job_ptr->cpu_count_reps; alloc_msg.job_step_id = step_rec->step_id; - alloc_msg.node_cnt = node_cnt; - alloc_msg.node_addr = node_addr; + alloc_msg.node_cnt = job_ptr->node_cnt; + alloc_msg.node_addr = job_ptr->node_addr; alloc_msg.cred = slurm_cred; alloc_msg.switch_job = switch_copy_jobinfo( step_rec->switch_job); @@ -558,7 +543,7 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) response_msg.data = &alloc_msg; if (slurm_send_node_msg(msg->conn_fd, &response_msg) < 0) - _kill_job_on_msg_fail(job_id); + _kill_job_on_msg_fail(job_ptr->job_id); slurm_cred_destroy(slurm_cred); switch_free_jobinfo(alloc_msg.switch_job); schedule_job_save(); /* has own locks */ @@ -1089,11 +1074,8 @@ static void _slurm_rpc_job_will_run(slurm_msg_t * msg) /* init */ DEF_TIMERS; int error_code = SLURM_SUCCESS; - uint16_t num_cpu_groups = 0; - uint32_t *cpus_per_node = NULL, *cpu_count_reps = NULL; - uint32_t job_id; + struct job_record *job_ptr; job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; - char *node_list_ptr = NULL; /* Locks: Write job, read node, read partition */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; @@ -1113,11 +1095,8 @@ static void _slurm_rpc_job_will_run(slurm_msg_t * msg) if (error_code == SLURM_SUCCESS) { lock_slurmctld(job_write_lock); - error_code = job_allocate(job_desc_msg, &job_id, - &node_list_ptr, &num_cpu_groups, - &cpus_per_node, &cpu_count_reps, - false, true, true, uid, NULL, - NULL); + error_code = job_allocate(job_desc_msg, + false, true, true, uid, &job_ptr); unlock_slurmctld(job_write_lock); END_TIMER; } @@ -1201,18 +1180,14 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) { int error_code = SLURM_SUCCESS; slurm_msg_t response_msg; + struct job_record *job_ptr; DEF_TIMERS; old_job_alloc_msg_t *job_desc_msg = (old_job_alloc_msg_t *) msg->data; - char *node_list_ptr = NULL; - uint16_t num_cpu_groups = 0; - uint32_t *cpus_per_node = NULL, *cpu_count_reps = NULL; resource_allocation_response_msg_t alloc_msg; /* Locks: Read job, read node */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; - uint16_t node_cnt; - slurm_addr *node_addr; uid_t uid; bool do_unlock = false; @@ -1230,15 +1205,12 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) do_unlock = true; lock_slurmctld(job_read_lock); error_code = old_job_info(job_desc_msg->uid, - job_desc_msg->job_id, - &node_list_ptr, &num_cpu_groups, - &cpus_per_node, &cpu_count_reps, - &node_cnt, &node_addr); + job_desc_msg->job_id, &job_ptr); END_TIMER; } /* return result */ - if (error_code) { + if (error_code || (job_ptr == NULL)) { if (do_unlock) unlock_slurmctld(job_read_lock); debug2("_slurm_rpc_old_job_alloc: JobId=%u, uid=%u: %s", @@ -1247,28 +1219,29 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) slurm_send_rc_msg(msg, error_code); } else { debug2("_slurm_rpc_old_job_alloc JobId=%u NodeList=%s %s", - job_desc_msg->job_id, node_list_ptr, TIME_STR); + job_desc_msg->job_id, job_ptr->nodes, TIME_STR); /* send job_ID and node_name_ptr */ alloc_msg.cpu_count_reps = xmalloc(sizeof(uint32_t) * - num_cpu_groups); - memcpy(alloc_msg.cpu_count_reps, cpu_count_reps, - (sizeof(uint32_t) * num_cpu_groups)); + job_ptr->num_cpu_groups); + memcpy(alloc_msg.cpu_count_reps, + job_ptr->cpu_count_reps, + (sizeof(uint32_t) * job_ptr->num_cpu_groups)); alloc_msg.cpus_per_node = xmalloc(sizeof(uint32_t) * - num_cpu_groups); - memcpy(alloc_msg.cpus_per_node, cpus_per_node, - (sizeof(uint32_t) * num_cpu_groups)); + job_ptr->num_cpu_groups); + memcpy(alloc_msg.cpus_per_node, job_ptr->cpus_per_node, + (sizeof(uint32_t) * job_ptr->num_cpu_groups)); alloc_msg.error_code = error_code; alloc_msg.job_id = job_desc_msg->job_id; alloc_msg.node_addr = xmalloc(sizeof(slurm_addr) * - node_cnt); - memcpy(alloc_msg.node_addr, node_addr, - (sizeof(slurm_addr) * node_cnt)); - alloc_msg.node_cnt = node_cnt; - alloc_msg.node_list = xstrdup(node_list_ptr); - alloc_msg.num_cpu_groups = num_cpu_groups; + job_ptr->node_cnt); + memcpy(alloc_msg.node_addr, job_ptr->node_addr, + (sizeof(slurm_addr) * job_ptr->node_cnt)); + alloc_msg.node_cnt = job_ptr->node_cnt; + alloc_msg.node_list = xstrdup(job_ptr->nodes); + alloc_msg.num_cpu_groups = job_ptr->num_cpu_groups; #ifdef HAVE_BGL - alloc_msg.bgl_part_id = xstrdup(DEFAULT_BGL_PART_ID); + alloc_msg.bgl_part_id = xstrdup(job_ptr->bgl_part_id); #endif unlock_slurmctld(job_read_lock); @@ -1444,7 +1417,7 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) /* init */ int error_code = SLURM_SUCCESS; DEF_TIMERS; - uint32_t job_id; + struct job_record *job_ptr; slurm_msg_t response_msg; submit_response_msg_t submit_msg; job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; @@ -1467,12 +1440,8 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) } if (error_code == SLURM_SUCCESS) { lock_slurmctld(job_write_lock); - error_code = job_allocate(job_desc_msg, &job_id, - (char **) NULL, - (uint16_t *) NULL, - (uint32_t **) NULL, - (uint32_t **) NULL, false, false, - false, uid, NULL, NULL); + error_code = job_allocate(job_desc_msg, false, false, + false, uid, &job_ptr); unlock_slurmctld(job_write_lock); END_TIMER; } @@ -1481,14 +1450,13 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) if ((error_code != SLURM_SUCCESS) && (error_code != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)) { info("_slurm_rpc_submit_batch_job: %s", - slurm_strerror(error_code)); + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { - info( - "_slurm_rpc_submit_batch_job JobId=%u %s", - job_id, TIME_STR); + info("_slurm_rpc_submit_batch_job JobId=%u %s", + job_ptr->job_id, TIME_STR); /* send job_ID */ - submit_msg.job_id = job_id; + submit_msg.job_id = job_ptr->job_id; submit_msg.error_code = error_code; response_msg.msg_type = RESPONSE_SUBMIT_BATCH_JOB; response_msg.data = &submit_msg; @@ -1538,6 +1506,31 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg) } } +/* + * slurm_drain_nodes - process a request to drain a list of nodes + * node_list IN - list of nodes to drain + * reason IN - reason to drain the nodes + * RET SLURM_SUCCESS or error code + */ +extern int slurm_drain_nodes(char *node_list, char *reason) +{ + int error_code; + update_node_msg_t update_node_msg; + /* Locks: Write node */ + slurmctld_lock_t node_write_lock = { + NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK }; + + update_node_msg.node_names = node_list; + update_node_msg.node_state = NODE_STATE_DRAINED; + update_node_msg.reason = reason; + + lock_slurmctld(node_write_lock); + error_code = update_node(&update_node_msg); + unlock_slurmctld(node_write_lock); + + return error_code; +} + /* _slurm_rpc_update_node - process RPC to update the configuration of a * node (e.g. UP/DOWN) */ static void _slurm_rpc_update_node(slurm_msg_t * msg) diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 641aebc5b7f..77a85b7b3a2 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -609,21 +609,12 @@ extern bool is_node_resp (char *name); * job_allocate - create job_records for the suppied job specification and * allocate nodes for it. * IN job_specs - job specifications - * IN node_list - location for storing new job's allocated nodes * IN immediate - if set then either initiate the job immediately or fail * IN will_run - don't initiate the job if set, just test if it could run * now or later * IN allocate - resource allocation request if set, not a full job - * OUT new_job_id - the new job's ID - * OUT num_cpu_groups - number of cpu groups (elements in cpus_per_node - * and cpu_count_reps) - * OUT cpus_per_node - pointer to array of numbers of cpus on each node - * allocate - * OUT cpu_count_reps - pointer to array of numbers of consecutive nodes - * having same cpu count - * OUT node_list - list of nodes allocated to the job - * OUT node_cnt - number of allocated nodes - * OUT node_addr - slurm_addr's for the allocated nodes + * IN submit_uid -uid of user issuing the request + * OUT job_pptr - set to pointer to job record * RET 0 or an error code. If the job would only be able to execute with * some change in partition configuration then * ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE is returned @@ -632,13 +623,11 @@ extern bool is_node_resp (char *name); * and cpu_count_reps={4,2,2} * globals: job_list - pointer to global job list * list_part - global list of partition info - * default_part_loc - pointer to default partition + * default_part_loc - pointer to default partition + * NOTE: lock_slurmctld on entry: Read config Write job, Write node, Read part */ -extern int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, - char **node_list, uint16_t * num_cpu_groups, - uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, - int immediate, int will_run, int allocate, uid_t submit_uid, - uint16_t * node_cnt, slurm_addr ** node_addr); +extern int job_allocate(job_desc_msg_t * job_specs, int immediate, int will_run, + int allocate, uid_t submit_uid, struct job_record **job_pptr); /* log the completion of the specified job */ extern void job_completion_logger(struct job_record *job_ptr); @@ -887,12 +876,10 @@ extern void node_not_resp (char *name, time_t msg_time); * old_job_info - get details about an existing job allocation * IN uid - job issuing the code * IN job_id - ID of job for which info is requested - * OUT everything else - the job's detains + * OUT job_pptr - set to pointer to job record */ -extern int old_job_info (uint32_t uid, uint32_t job_id, char **node_list, - uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, - uint32_t ** cpu_count_reps, - uint16_t * node_cnt, slurm_addr ** node_addr); +extern int old_job_info(uint32_t uid, uint32_t job_id, + struct job_record **job_pptr); /* @@ -1104,6 +1091,14 @@ extern void signal_step_tasks(struct step_record *step_ptr, uint16_t signal); */ extern int slurmctld_shutdown(void); +/* + * slurm_drain_nodes - process a request to drain a list of nodes + * node_list IN - list of nodes to drain + * reason IN - reason to drain the nodes + * RET SLURM_SUCCESS or error code + */ +extern int slurm_drain_nodes(char *node_list, char *reason); + /* * step_create - creates a step_record in step_specs->job_id, sets up the * accoding to the step_specs. diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c index db063c26efa..fe02f9435c3 100644 --- a/src/slurmctld/srun_comm.c +++ b/src/slurmctld/srun_comm.c @@ -90,7 +90,7 @@ extern void srun_allocate (uint32_t job_id) memcpy(msg_arg->node_addr, job_ptr->node_addr, (sizeof(slurm_addr) * job_ptr->node_cnt)); #ifdef HAVE_BGL - msg_arg->bgl_part_id = xstrdup(DEFAULT_BGL_PART_ID); + msg_arg->bgl_part_id = xstrdup(job_ptr->bgl_part_id); #endif msg_arg->error_code = SLURM_SUCCESS; _srun_agent_launch(addr, job_ptr->host, -- GitLab