diff --git a/NEWS b/NEWS index 6b808b9bb9b02b4cd5f5c0a3e3647ed59e9dcaf4..f2edac7241a3fab0424be8b54244aca0e010afe4 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,11 @@ documents those changes that are of interest to users and admins. -- Moved task layout to slurmctld instead of srun. Job step create returns step_layout structure with hostnames and addresses that corrisponds to those nodes. + -- changed api slurm_lookup_allocation params, + resource_allocation_response_msg_t changed to job_alloc_info_response_msg_t + this structure is being renamed so contents are the same. + -- alter resource_allocation_response_msg_t see slurm.h.in + -- remove old_job_alloc_msg_t and function slurm_confirm_alloc * Changes in SLURM 1.2.0-pre1 ============================= diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 07442652dad39638aa7142e31f9ce1083837ae73..962a2467573ef5ade133babf4e6bc12d62d0e195 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -604,9 +604,9 @@ typedef struct node_info_msg { node_info_t *node_array; /* the node records */ } node_info_msg_t; -typedef struct old_job_alloc_msg { +typedef struct job_alloc_info_msg { uint32_t job_id; /* job ID */ -} old_job_alloc_msg_t; +} job_alloc_info_msg_t; typedef struct partition_info { char *name; /* name of the partition */ @@ -647,6 +647,19 @@ typedef struct resource_allocation_response_msg { * use select_g_get_jobinfo() to access conents */ } resource_allocation_response_msg_t; +typedef struct job_alloc_info_response_msg { + uint32_t job_id; /* assigned job id */ + char *node_list; /* assigned list of nodes */ + uint16_t num_cpu_groups;/* elements in below cpu arrays */ + uint32_t *cpus_per_node;/* cpus per node */ + uint32_t *cpu_count_reps;/* how many nodes have same cpu count */ + uint16_t node_cnt; /* count of nodes */ + slurm_addr *node_addr; /* network addresses */ + uint32_t error_code; /* error code for warning message */ + select_jobinfo_t select_jobinfo; /* opaque data structure, + * use select_g_get_jobinfo() to access conents */ +} job_alloc_info_response_msg_t; + typedef struct partition_info_msg { time_t last_update; /* time of latest info */ uint32_t record_count; /* number of records */ @@ -801,29 +814,33 @@ extern resource_allocation_response_msg_t * extern void slurm_free_resource_allocation_response_msg PARAMS(( resource_allocation_response_msg_t * msg)); - /* - * OBSOLETE! This function, along with the old_job_alloc_msg_t - * structure, will go away in a future version of SLURM. Use - * slurm_allocation_lookup() instead. - * slurm_confirm_allocation - confirm an existing resource allocation - * IN job_desc_msg - description of existing job request - * OUT slurm_alloc_msg - response to request - * RET 0 on success, otherwise return -1 and set errno to indicate the error - * NOTE: free the response using slurm_free_resource_allocation_response_msg + * slurm_free_job_alloc_info_response_msg - free slurm resource + * allocation lookup message + * IN msg - pointer to job allocation info response message + * NOTE: buffer is loaded by slurm_allocation_lookup */ -extern int slurm_confirm_allocation PARAMS(( - old_job_alloc_msg_t * job_desc_msg, - resource_allocation_response_msg_t ** slurm_alloc_msg)); +extern void slurm_free_job_alloc_info_response_msg PARAMS(( + job_alloc_info_response_msg_t * msg)); /* * slurm_allocation_lookup - retrieve info for an existing resource allocation * IN job_id - job allocation identifier * OUT resp - job allocation information * RET 0 on success, otherwise return -1 and set errno to indicate the error - * NOTE: free "info" using slurm_free_resource_allocation_response_msg + * NOTE: free "info" using slurm_free_job_alloc_info_response_msg */ extern int slurm_allocation_lookup PARAMS(( + uint32_t job_id, job_alloc_info_response_msg_t **info)); +/* + * slurm_allocation_lookup_lite - retrieve minor info for an existing + * resource allocation + * IN job_id - job allocation identifier + * OUT resp - job allocation information + * RET 0 on success, otherwise return -1 and set errno to indicate the error + * NOTE: free "info" using slurm_free_job_alloc_info_response_msg + */ +extern int slurm_allocation_lookup_lite PARAMS(( uint32_t job_id, resource_allocation_response_msg_t **info)); /* @@ -1208,7 +1225,7 @@ extern int slurm_get_select_jobinfo PARAMS((select_jobinfo_t jobinfo, * IN job_id - get information for specific job id, zero for all jobs * IN step_id - get information for specific job step id, zero for all * job steps - * IN job_info_msg_pptr - place to store a job configuration pointer + * IN step_response_pptr - place to store a step response pointer * IN show_flags - job step filtering options * RET 0 on success, otherwise return -1 and set errno to indicate the error * NOTE: free the response using slurm_free_job_step_info_response_msg diff --git a/src/api/allocate.c b/src/api/allocate.c index 194313d9e2820290fd8ceea1e66fba12501e3b83..dfa43442a579e76a78ba5ce1b347e255d5982568 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -159,7 +159,7 @@ slurm_allocate_resources_blocking (const job_desc_msg_t *user_req, slurm_msg_t req_msg; slurm_msg_t resp_msg; resource_allocation_response_msg_t *resp = NULL; - char *hostname; + char *hostname = NULL; uint32_t job_id; job_desc_msg_t *req; listen_t *listen = NULL; @@ -339,30 +339,26 @@ slurm_job_step_create (job_step_create_request_msg_t *req, } /* - * OBSOLETE! This function, along with the old_job_alloc_msg_t - * structure, will go away in a future version of SLURM. Use - * slurm_allocation_lookup() instead. - * slurm_confirm_allocation - confirm an existing resource allocation - * IN job_desc_msg - description of existing job request - * OUT slurm_alloc_msg - response to request + * slurm_allocation_lookup - retrieve info for an existing resource allocation + * IN jobid - job allocation identifier + * OUT info - job allocation information * RET 0 on success, otherwise return -1 and set errno to indicate the error - * NOTE: free the response using slurm_free_resource_allocation_response_msg + * NOTE: free the "resp" using slurm_free_resource_allocation_response_msg */ -int -slurm_confirm_allocation (old_job_alloc_msg_t *req, - resource_allocation_response_msg_t **resp) +int +slurm_allocation_lookup(uint32_t jobid, + job_alloc_info_response_msg_t **info) { + job_alloc_info_msg_t req; slurm_msg_t req_msg; slurm_msg_t resp_msg; - req_msg.msg_type = REQUEST_OLD_JOB_RESOURCE_ALLOCATION; - req_msg.data = req; + req.job_id = jobid; + req_msg.msg_type = REQUEST_JOB_ALLOCATION_INFO; + req_msg.data = &req; forward_init(&req_msg.forward, NULL); req_msg.ret_list = NULL; req_msg.forward_struct_init = 0; - forward_init(&resp_msg.forward, NULL); - resp_msg.ret_list = NULL; - resp_msg.forward_struct_init = 0; if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; @@ -371,10 +367,10 @@ slurm_confirm_allocation (old_job_alloc_msg_t *req, case RESPONSE_SLURM_RC: if (_handle_rc_msg(&resp_msg) < 0) return SLURM_ERROR; - *resp = NULL; + *info = NULL; break; - case RESPONSE_RESOURCE_ALLOCATION: - *resp = (resource_allocation_response_msg_t *) resp_msg.data; + case RESPONSE_JOB_ALLOCATION_INFO: + *info = (job_alloc_info_response_msg_t *)resp_msg.data; return SLURM_PROTOCOL_SUCCESS; break; default: @@ -386,24 +382,28 @@ slurm_confirm_allocation (old_job_alloc_msg_t *req, } /* - * slurm_allocation_lookup - retrieve info for an existing resource allocation + * slurm_allocation_lookup_lite - retrieve info for an existing resource + * allocation with out the addrs and such * IN jobid - job allocation identifier * OUT info - job allocation information * RET 0 on success, otherwise return -1 and set errno to indicate the error * NOTE: free the "resp" using slurm_free_resource_allocation_response_msg */ int -slurm_allocation_lookup(uint32_t jobid, - resource_allocation_response_msg_t **info) +slurm_allocation_lookup_lite(uint32_t jobid, + resource_allocation_response_msg_t **info) { - old_job_alloc_msg_t req; + job_alloc_info_msg_t req; slurm_msg_t req_msg; slurm_msg_t resp_msg; req.job_id = jobid; - req_msg.msg_type = REQUEST_OLD_JOB_RESOURCE_ALLOCATION; + req_msg.msg_type = REQUEST_JOB_ALLOCATION_INFO_LITE; req_msg.data = &req; - + forward_init(&req_msg.forward, NULL); + req_msg.ret_list = NULL; + req_msg.forward_struct_init = 0; + if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; @@ -413,7 +413,7 @@ slurm_allocation_lookup(uint32_t jobid, return SLURM_ERROR; *info = NULL; break; - case RESPONSE_RESOURCE_ALLOCATION: + case RESPONSE_JOB_ALLOCATION_INFO_LITE: *info = (resource_allocation_response_msg_t *) resp_msg.data; return SLURM_PROTOCOL_SUCCESS; break; @@ -739,9 +739,9 @@ _wait_for_allocation_response(uint32_t job_id, const listen_t *listen, * Let's see if the controller thinks that the allocation * has been granted. */ - if (slurm_allocation_lookup(job_id, &resp) >= 0) + if (slurm_allocation_lookup_lite(job_id, &resp) >= 0) { return resp; - + } if (slurm_get_errno() == ESLURM_JOB_PENDING) { debug3("Still waiting for allocation"); errno = errnum; diff --git a/src/api/job_info.c b/src/api/job_info.c index 63815d3596bd5093d4b6494dd2e1d434a384a51c..7248283d8c8431b7e2f08a460416e133305ab4c4 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -435,7 +435,7 @@ slurm_get_end_time(uint32_t jobid, time_t *end_time_ptr) int rc; slurm_msg_t resp_msg; slurm_msg_t req_msg; - old_job_alloc_msg_t job_msg; + job_alloc_info_msg_t job_msg; srun_timeout_msg_t *timeout_msg; time_t now = time(NULL); static uint32_t jobid_cache = 0; diff --git a/src/api/signal.c b/src/api/signal.c index 9bf77282035ecab853b4fe6ae5180d4b49fe052d..5267618458f93c9716c7ad0099ab36183a11e9cb 100644 --- a/src/api/signal.c +++ b/src/api/signal.c @@ -45,14 +45,14 @@ #define MAX_THREADS 50 static int _signal_job_step(const job_step_info_t *step, - const resource_allocation_response_msg_t *allocation, + const job_alloc_info_response_msg_t *allocation, uint16_t signal); static int _signal_batch_script_step( - const resource_allocation_response_msg_t *allocation, uint16_t signal); + const job_alloc_info_response_msg_t *allocation, uint16_t signal); static int _terminate_job_step(const job_step_info_t *step, - const resource_allocation_response_msg_t *allocation); + const job_alloc_info_response_msg_t *allocation); static int _terminate_batch_script_step( - const resource_allocation_response_msg_t *allocation); + const job_alloc_info_response_msg_t *allocation); static int _p_send_recv_rc_msg(int num_nodes, slurm_msg_t msg[], int rc[], int timeout); static void *_thr_send_recv_rc_msg(void *args); @@ -75,7 +75,7 @@ extern int slurm_signal_job (uint32_t job_id, uint16_t signal) { int rc = SLURM_SUCCESS; - resource_allocation_response_msg_t *alloc_info; + job_alloc_info_response_msg_t *alloc_info; slurm_msg_t *msg; /* array of message structs, one per node */ signal_job_msg_t rpc; int *rc_array; @@ -89,8 +89,7 @@ slurm_signal_job (uint32_t job_id, uint16_t signal) /* same remote procedure call for each node */ rpc.job_id = job_id; rpc.signal = (uint32_t)signal; - - msg = xmalloc(sizeof(slurm_msg_t) * alloc_info->node_cnt); + msg = xmalloc(sizeof(slurm_msg_t) * alloc_info->node_cnt); rc_array = xmalloc(sizeof(int) * alloc_info->node_cnt); for (i = 0; i < alloc_info->node_cnt; i++) { msg[i].msg_type = REQUEST_SIGNAL_JOB; @@ -109,7 +108,7 @@ slurm_signal_job (uint32_t job_id, uint16_t signal) xfree(msg); xfree(rc_array); - slurm_free_resource_allocation_response_msg(alloc_info); + slurm_free_job_alloc_info_response_msg(alloc_info); fail1: if (rc) { slurm_seterrno_ret(rc); @@ -129,7 +128,7 @@ fail1: extern int slurm_signal_job_step (uint32_t job_id, uint32_t step_id, uint16_t signal) { - resource_allocation_response_msg_t *alloc_info; + job_alloc_info_response_msg_t *alloc_info; job_step_info_response_msg_t *step_info; int rc; int i; @@ -145,7 +144,7 @@ slurm_signal_job_step (uint32_t job_id, uint32_t step_id, uint16_t signal) */ if (step_id == SLURM_BATCH_SCRIPT) { rc = _signal_batch_script_step(alloc_info, signal); - slurm_free_resource_allocation_response_msg(alloc_info); + slurm_free_job_alloc_info_response_msg(alloc_info); errno = rc; return rc ? -1 : 0; } @@ -171,7 +170,7 @@ slurm_signal_job_step (uint32_t job_id, uint32_t step_id, uint16_t signal) } slurm_free_job_step_info_response_msg(step_info); fail: - slurm_free_resource_allocation_response_msg(alloc_info); + slurm_free_job_alloc_info_response_msg(alloc_info); errno = save_errno; return rc ? -1 : 0; } @@ -184,7 +183,7 @@ fail: */ static int _get_step_addresses(const job_step_info_t *step, - const resource_allocation_response_msg_t *allocation, + const job_alloc_info_response_msg_t *allocation, slurm_addr **address, int *num_addresses) { hostset_t alloc_nodes; @@ -223,7 +222,7 @@ _get_step_addresses(const job_step_info_t *step, static int _signal_job_step(const job_step_info_t *step, - const resource_allocation_response_msg_t *allocation, + const job_alloc_info_response_msg_t *allocation, uint16_t signal) { slurm_msg_t *msg; /* array of message structs, one per node */ @@ -268,7 +267,7 @@ _signal_job_step(const job_step_info_t *step, } static int _signal_batch_script_step( - const resource_allocation_response_msg_t *allocation, uint16_t signal) + const job_alloc_info_response_msg_t *allocation, uint16_t signal) { slurm_msg_t msg; kill_tasks_msg_t rpc; @@ -387,7 +386,7 @@ extern int slurm_terminate_job (uint32_t job_id) { int rc = SLURM_SUCCESS; - resource_allocation_response_msg_t *alloc_info; + job_alloc_info_response_msg_t *alloc_info; slurm_msg_t *msg; /* array of message structs, one per node */ signal_job_msg_t rpc; int *rc_array; @@ -421,7 +420,7 @@ slurm_terminate_job (uint32_t job_id) xfree(msg); xfree(rc_array); - slurm_free_resource_allocation_response_msg(alloc_info); + slurm_free_job_alloc_info_response_msg(alloc_info); slurm_complete_job(job_id, 0); fail1: @@ -444,7 +443,7 @@ fail1: extern int slurm_terminate_job_step (uint32_t job_id, uint32_t step_id) { - resource_allocation_response_msg_t *alloc_info; + job_alloc_info_response_msg_t *alloc_info; job_step_info_response_msg_t *step_info; int rc = 0; int i; @@ -460,7 +459,7 @@ slurm_terminate_job_step (uint32_t job_id, uint32_t step_id) */ if (step_id == SLURM_BATCH_SCRIPT) { rc = _terminate_batch_script_step(alloc_info); - slurm_free_resource_allocation_response_msg(alloc_info); + slurm_free_job_alloc_info_response_msg(alloc_info); errno = rc; return rc ? -1 : 0; } @@ -486,7 +485,7 @@ slurm_terminate_job_step (uint32_t job_id, uint32_t step_id) } slurm_free_job_step_info_response_msg(step_info); fail: - slurm_free_resource_allocation_response_msg(alloc_info); + slurm_free_job_alloc_info_response_msg(alloc_info); errno = save_errno; return rc ? -1 : 0; } @@ -500,7 +499,7 @@ fail: */ static int _terminate_job_step(const job_step_info_t *step, - const resource_allocation_response_msg_t *allocation) + const job_alloc_info_response_msg_t *allocation) { slurm_msg_t *msg; /* array of message structs, one per node */ kill_tasks_msg_t rpc; @@ -544,7 +543,7 @@ _terminate_job_step(const job_step_info_t *step, } static int _terminate_batch_script_step( - const resource_allocation_response_msg_t *allocation) + const job_alloc_info_response_msg_t *allocation) { slurm_msg_t msg; kill_tasks_msg_t rpc; diff --git a/src/api/step_ctx.c b/src/api/step_ctx.c index f1fbe740a19863f9b87d9cd9172da8bcc24d01f4..8ca2eb8fa303d36803dbc673996f4d5f557e50a5 100644 --- a/src/api/step_ctx.c +++ b/src/api/step_ctx.c @@ -70,14 +70,14 @@ slurm_step_ctx_create (job_step_create_request_msg_t *step_req) { struct slurm_step_ctx_struct *ctx; job_step_create_response_msg_t *step_resp = NULL; - resource_allocation_response_msg_t *alloc_resp = NULL; + job_alloc_info_response_msg_t *alloc_resp = NULL; if (slurm_allocation_lookup(step_req->job_id, &alloc_resp) < 0) return NULL; if ((slurm_job_step_create(step_req, &step_resp) < 0) || (step_resp == NULL)) { - slurm_free_resource_allocation_response_msg(alloc_resp); + slurm_free_job_alloc_info_response_msg(alloc_resp); return NULL; /* slurm errno already set */ } @@ -274,7 +274,7 @@ slurm_step_ctx_destroy (slurm_step_ctx ctx) } _free_step_req(ctx->step_req); slurm_free_job_step_create_response_msg(ctx->step_resp); - slurm_free_resource_allocation_response_msg(ctx->alloc_resp); + slurm_free_job_alloc_info_response_msg(ctx->alloc_resp); if (ctx->argv) _xfree_char_array(&ctx->argv, ctx->argc); if (ctx->env_set) diff --git a/src/api/step_ctx.h b/src/api/step_ctx.h index ee733b46edccb359eea8fa065bdbfd30fbde104e..572dde8ac58128cba845d4fa10e534255320398c 100644 --- a/src/api/step_ctx.h +++ b/src/api/step_ctx.h @@ -72,7 +72,7 @@ struct slurm_step_ctx_struct { uint32_t job_id; /* assigned job id */ uint32_t user_id; /* user the job runs as */ - resource_allocation_response_msg_t *alloc_resp; + job_alloc_info_response_msg_t *alloc_resp; job_step_create_request_msg_t *step_req; job_step_create_response_msg_t *step_resp; diff --git a/src/common/forward.c b/src/common/forward.c index 3d68a741340bf332ee36687915e04064740b418d..79f5fd75d7bffd51188c41c620fa41e791deee85 100644 --- a/src/common/forward.c +++ b/src/common/forward.c @@ -233,9 +233,6 @@ int _destroy_data_info_data(uint32_t type, ret_data_info_t *ret_data_info) case REQUEST_JOB_INFO: slurm_free_job_info_request_msg(ret_data_info->data); break; - case REQUEST_JOB_END_TIME: - slurm_free_old_job_alloc_msg(ret_data_info->data); - break; case REQUEST_NODE_INFO: slurm_free_node_info_request_msg(ret_data_info->data); break; @@ -269,8 +266,9 @@ int _destroy_data_info_data(uint32_t type, ret_data_info_t *ret_data_info) case MESSAGE_NODE_REGISTRATION_STATUS: slurm_free_node_registration_status_msg(ret_data_info->data); break; - case REQUEST_OLD_JOB_RESOURCE_ALLOCATION: - slurm_free_old_job_alloc_msg(ret_data_info->data); + case REQUEST_JOB_END_TIME: + case REQUEST_JOB_ALLOCATION_INFO: + slurm_free_job_alloc_info_msg(ret_data_info->data); break; case SLURM_SUCCESS: case REQUEST_PING: diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 2293b3a74a27cf304629b3266c7d4665fc04c15f..ebe40b18f6762f2e14c6a08894f95f485b2ec33e 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -70,7 +70,7 @@ void slurm_free_shutdown_msg(shutdown_msg_t * msg) xfree(msg); } -void slurm_free_old_job_alloc_msg(old_job_alloc_msg_t * msg) +void slurm_free_job_alloc_info_msg(job_alloc_info_msg_t * msg) { xfree(msg); } @@ -683,6 +683,24 @@ void slurm_free_resource_allocation_response_msg ( } } +/* + * slurm_free_job_alloc_info_response_msg - free slurm job allocation + * info response message + * IN msg - pointer to job allocation info response message + * NOTE: buffer is loaded by slurm_allocate_resources + */ +void slurm_free_job_alloc_info_response_msg(job_alloc_info_response_msg_t *msg) +{ + if (msg) { + select_g_free_jobinfo(&msg->select_jobinfo); + xfree(msg->node_list); + xfree(msg->cpus_per_node); + xfree(msg->cpu_count_reps); + xfree(msg->node_addr); + xfree(msg); + } +} + /* * slurm_free_job_step_create_response_msg - free slurm diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 906cb70df95797fd08e26dc37a5bda0727b0cadc..a9dbc8564acc8e23e41c9ca1339a8da9cc085214 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -123,7 +123,10 @@ typedef enum { RESPONSE_JOB_ATTACH, REQUEST_JOB_WILL_RUN, RESPONSE_JOB_WILL_RUN, - REQUEST_OLD_JOB_RESOURCE_ALLOCATION, + REQUEST_JOB_ALLOCATION_INFO, + RESPONSE_JOB_ALLOCATION_INFO, + REQUEST_JOB_ALLOCATION_INFO_LITE, + RESPONSE_JOB_ALLOCATION_INFO_LITE, REQUEST_UPDATE_JOB_TIME, REQUEST_JOB_READY, RESPONSE_JOB_READY, @@ -603,14 +606,12 @@ typedef struct slurm_ctl_conf slurm_ctl_conf_info_msg_t; /* free message functions */ void inline slurm_free_last_update_msg(last_update_msg_t * msg); void inline slurm_free_return_code_msg(return_code_msg_t * msg); -void inline slurm_free_old_job_alloc_msg(old_job_alloc_msg_t * msg); +void inline slurm_free_job_alloc_info_msg(job_alloc_info_msg_t * msg); void inline slurm_free_job_info_request_msg(job_info_request_msg_t *msg); void inline slurm_free_job_step_info_request_msg( job_step_info_request_msg_t *msg); -void inline slurm_free_node_info_request_msg( - node_info_request_msg_t *msg); -void inline slurm_free_part_info_request_msg( - part_info_request_msg_t *msg); +void inline slurm_free_node_info_request_msg(node_info_request_msg_t *msg); +void inline slurm_free_part_info_request_msg(part_info_request_msg_t *msg); #define slurm_free_timelimit_msg(msg) \ slurm_free_kill_job_msg(msg) @@ -666,6 +667,8 @@ void inline slurm_free_checkpoint_resp_msg(checkpoint_resp_msg_t *msg); void inline slurm_free_suspend_msg(suspend_msg_t *msg); void slurm_free_resource_allocation_response_msg ( resource_allocation_response_msg_t * msg); +void slurm_free_job_alloc_info_response_msg ( + job_alloc_info_response_msg_t * msg); void slurm_free_job_step_create_response_msg( job_step_create_response_msg_t * msg); void slurm_free_submit_response_response_msg(submit_response_msg_t * msg); diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 18b5902a06d750e23620f59aa7c33a82fc7c2e8c..66aa5e4ec2a7f5fe3467f1c71c628e5d775525ce 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -61,21 +61,28 @@ static void _pack_update_node_msg(update_node_msg_t * msg, Buf buffer); static int _unpack_update_node_msg(update_node_msg_t ** msg, Buf buffer); static void - _pack_node_registration_status_msg(slurm_node_registration_status_msg_t * - msg, Buf buffer); +_pack_node_registration_status_msg(slurm_node_registration_status_msg_t * + msg, Buf buffer); static int - _unpack_node_registration_status_msg(slurm_node_registration_status_msg_t - ** msg, Buf buffer); +_unpack_node_registration_status_msg(slurm_node_registration_status_msg_t + ** msg, Buf buffer); static void _pack_job_ready_msg(job_id_msg_t * msg, Buf buffer); static int _unpack_job_ready_msg(job_id_msg_t ** msg_ptr, Buf buffer); static void - _pack_resource_allocation_response_msg(resource_allocation_response_msg_t * - msg, Buf buffer); +_pack_resource_allocation_response_msg(resource_allocation_response_msg_t * + msg, Buf buffer); static int - _unpack_resource_allocation_response_msg(resource_allocation_response_msg_t - ** msg, Buf buffer); +_unpack_resource_allocation_response_msg(resource_allocation_response_msg_t + ** msg, Buf buffer); + +static void +_pack_job_alloc_info_response_msg(job_alloc_info_response_msg_t * msg, + Buf buffer); +static int +_unpack_job_alloc_info_response_msg(job_alloc_info_response_msg_t ** msg, + Buf buffer); static void _pack_submit_response_msg(submit_response_msg_t * msg, Buf buffer); @@ -83,15 +90,15 @@ static int _unpack_submit_response_msg(submit_response_msg_t ** msg, Buf buffer); static void _pack_node_info_request_msg( - node_info_request_msg_t * msg, Buf buffer); + node_info_request_msg_t * msg, Buf buffer); static int _unpack_node_info_request_msg( - node_info_request_msg_t ** msg, Buf bufer); + node_info_request_msg_t ** msg, Buf bufer); static int _unpack_node_info_msg(node_info_msg_t ** msg, Buf buffer); static int _unpack_node_info_members(node_info_t * node, Buf buffer); static int _unpack_node_select_info_msg(node_select_info_msg_t ** msg, - Buf buffer); + Buf buffer); static void _pack_update_partition_msg(update_part_msg_t * msg, Buf buffer); static int _unpack_update_partition_msg(update_part_msg_t ** msg, Buf buffer); @@ -102,7 +109,7 @@ static int _unpack_delete_partition_msg(delete_part_msg_t ** msg, Buf buffer); static void _pack_job_step_create_request_msg(job_step_create_request_msg_t * msg, Buf buffer); static int _unpack_job_step_create_request_msg( - job_step_create_request_msg_t ** msg, Buf buffer); + job_step_create_request_msg_t ** msg, Buf buffer); static void _pack_kill_job_msg(kill_job_msg_t * msg, Buf buffer); static int _unpack_kill_job_msg(kill_job_msg_t ** msg, Buf buffer); @@ -117,15 +124,15 @@ static void _pack_update_job_time_msg(job_time_msg_t * msg, Buf buffer); static int _unpack_update_job_time_msg(job_time_msg_t ** msg, Buf buffer); static void _pack_job_step_create_response_msg( - job_step_create_response_msg_t * msg, Buf buffer); + job_step_create_response_msg_t * msg, Buf buffer); static int _unpack_job_step_create_response_msg( - job_step_create_response_msg_t ** msg, Buf buffer); + job_step_create_response_msg_t ** msg, Buf buffer); static void _pack_part_info_request_msg(part_info_request_msg_t * msg, - Buf buffer); + Buf buffer); static int _unpack_part_info_request_msg(part_info_request_msg_t ** - msg, Buf buffer); + msg, Buf buffer); static void _pack_partition_info_msg(slurm_msg_t * msg, Buf buffer); static int _unpack_partition_info_msg(partition_info_msg_t ** msg, @@ -140,9 +147,9 @@ static int _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** static void _pack_spawn_task_request_msg(spawn_task_request_msg_t * - msg, Buf buffer); + msg, Buf buffer); static int _unpack_spawn_task_request_msg(spawn_task_request_msg_t ** - msg_ptr, Buf buffer); + msg_ptr, Buf buffer); static void _pack_cancel_tasks_msg(kill_tasks_msg_t * msg, Buf buffer); static int _unpack_cancel_tasks_msg(kill_tasks_msg_t ** msg_ptr, Buf buffer); @@ -161,17 +168,18 @@ static int _unpack_reattach_tasks_request_msg(reattach_tasks_request_msg_t **, Buf); static void - _pack_reattach_tasks_response_msg(reattach_tasks_response_msg_t *, Buf); +_pack_reattach_tasks_response_msg(reattach_tasks_response_msg_t *, Buf); static int - _unpack_reattach_tasks_response_msg(reattach_tasks_response_msg_t **, Buf); +_unpack_reattach_tasks_response_msg(reattach_tasks_response_msg_t **, Buf); static void _pack_task_exit_msg(task_exit_msg_t * msg, Buf buffer); static int _unpack_task_exit_msg(task_exit_msg_t ** msg_ptr, Buf buffer); -static void _pack_old_job_desc_msg(old_job_alloc_msg_t * job_desc_ptr, - Buf buffer); -static int _unpack_old_job_desc_msg(old_job_alloc_msg_t ** - job_desc_buffer_ptr, Buf buffer); +static void _pack_job_alloc_info_msg(job_alloc_info_msg_t * job_desc_ptr, + Buf buffer); +static int +_unpack_job_alloc_info_msg(job_alloc_info_msg_t **job_desc_buffer_ptr, + Buf buffer); static void _pack_return_code_msg(return_code_msg_t * msg, Buf buffer); static int _unpack_return_code_msg(return_code_msg_t ** msg, Buf buffer); @@ -182,14 +190,14 @@ static int _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t ** build_buffer_ptr, Buf buffer); static void _pack_job_info_request_msg(job_info_request_msg_t * - msg, Buf buffer); + msg, Buf buffer); static int _unpack_job_info_request_msg(job_info_request_msg_t** - msg, Buf buffer); + msg, Buf buffer); static void _pack_node_select_info_req_msg(node_info_select_request_msg_t * - msg, Buf buffer); + msg, Buf buffer); static int _unpack_node_select_info_req_msg(node_info_select_request_msg_t ** - msg, Buf buffer); + msg, Buf buffer); static void _pack_job_step_info_req_msg(job_step_info_request_msg_t * msg, Buf buffer); @@ -259,7 +267,7 @@ static int _unpack_srun_ping_msg(srun_ping_msg_t ** msg_ptr, Buf buffer); static void _pack_srun_node_fail_msg(srun_node_fail_msg_t * msg, Buf buffer); static int _unpack_srun_node_fail_msg(srun_node_fail_msg_t ** msg_ptr, - Buf buffer); + Buf buffer); static void _pack_srun_timeout_msg(srun_timeout_msg_t * msg, Buf buffer); static int @@ -270,11 +278,11 @@ static int _unpack_checkpoint_msg(checkpoint_msg_t **msg_ptr, Buf buffer); static void _pack_checkpoint_resp_msg(checkpoint_resp_msg_t *msg, Buf buffer); static int _unpack_checkpoint_resp_msg(checkpoint_resp_msg_t **msg_ptr, - Buf buffer); + Buf buffer); static void _pack_checkpoint_comp(checkpoint_comp_msg_t *msg, Buf buffer); static int _unpack_checkpoint_comp(checkpoint_comp_msg_t **msg_ptr, - Buf buffer); + Buf buffer); static void _pack_suspend_msg(suspend_msg_t *msg, Buf buffer); static int _unpack_suspend_msg(suspend_msg_t **msg_ptr, Buf buffer); @@ -382,7 +390,7 @@ unpack_header(header_t * header, Buf buffer) return SLURM_SUCCESS; - unpack_error: +unpack_error: error("unpacking header"); destroy_forward(&header->forward); if(header->ret_list) @@ -402,255 +410,262 @@ int pack_msg(slurm_msg_t const *msg, Buf buffer) { switch (msg->msg_type) { - case REQUEST_NODE_INFO: + case REQUEST_NODE_INFO: _pack_node_info_request_msg((node_info_request_msg_t *) - msg->data, buffer); + msg->data, buffer); break; - case REQUEST_PARTITION_INFO: + case REQUEST_PARTITION_INFO: _pack_part_info_request_msg((part_info_request_msg_t *) - msg->data, buffer); - break; - case REQUEST_BUILD_INFO: - case REQUEST_ACCTING_INFO: - _pack_last_update_msg((last_update_msg_t *) - msg->data, buffer); - break; - case RESPONSE_BUILD_INFO: - _pack_slurm_ctl_conf_msg((slurm_ctl_conf_info_msg_t *) - msg->data, buffer); - break; - case RESPONSE_JOB_INFO: - _pack_job_info_msg((slurm_msg_t *) msg, buffer); - break; - case RESPONSE_PARTITION_INFO: - _pack_partition_info_msg((slurm_msg_t *) msg, buffer); - break; - case RESPONSE_NODE_INFO: - _pack_node_info_msg((slurm_msg_t *) msg, buffer); - break; - case MESSAGE_NODE_REGISTRATION_STATUS: - _pack_node_registration_status_msg( + msg->data, buffer); + break; + case REQUEST_BUILD_INFO: + case REQUEST_ACCTING_INFO: + _pack_last_update_msg((last_update_msg_t *) + msg->data, buffer); + break; + case RESPONSE_BUILD_INFO: + _pack_slurm_ctl_conf_msg((slurm_ctl_conf_info_msg_t *) + msg->data, buffer); + break; + case RESPONSE_JOB_INFO: + _pack_job_info_msg((slurm_msg_t *) msg, buffer); + break; + case RESPONSE_PARTITION_INFO: + _pack_partition_info_msg((slurm_msg_t *) msg, buffer); + break; + case RESPONSE_NODE_INFO: + _pack_node_info_msg((slurm_msg_t *) msg, buffer); + break; + case MESSAGE_NODE_REGISTRATION_STATUS: + _pack_node_registration_status_msg( (slurm_node_registration_status_msg_t *) msg->data, buffer); - break; - case REQUEST_RESOURCE_ALLOCATION: - case REQUEST_SUBMIT_BATCH_JOB: - case REQUEST_JOB_WILL_RUN: - case REQUEST_UPDATE_JOB: - _pack_job_desc_msg((job_desc_msg_t *) - msg->data, buffer); - break; - case REQUEST_JOB_END_TIME: - case REQUEST_OLD_JOB_RESOURCE_ALLOCATION: - _pack_old_job_desc_msg((old_job_alloc_msg_t *) msg->data, - buffer); - break; - case REQUEST_NODE_REGISTRATION_STATUS: - case REQUEST_RECONFIGURE: - case REQUEST_SHUTDOWN_IMMEDIATE: - case REQUEST_PING: - case REQUEST_CONTROL: - /* Message contains no body/information */ - break; - case REQUEST_SHUTDOWN: - _pack_shutdown_msg((shutdown_msg_t *) msg->data, buffer); - break; - case RESPONSE_SUBMIT_BATCH_JOB: - _pack_submit_response_msg((submit_response_msg_t *) - msg->data, buffer); - break; - case RESPONSE_RESOURCE_ALLOCATION: - case RESPONSE_JOB_WILL_RUN: - _pack_resource_allocation_response_msg - ((resource_allocation_response_msg_t *) msg->data, - buffer); - break; - case REQUEST_UPDATE_NODE: - _pack_update_node_msg((update_node_msg_t *) msg->data, - buffer); - break; - case REQUEST_UPDATE_PARTITION: - _pack_update_partition_msg((update_part_msg_t *) msg-> - data, buffer); - break; - case REQUEST_DELETE_PARTITION: - _pack_delete_partition_msg((delete_part_msg_t *) msg-> - data, buffer); - break; - case REQUEST_REATTACH_TASKS: - _pack_reattach_tasks_request_msg( + break; + case REQUEST_JOB_ALLOCATION_INFO_LITE: + case REQUEST_RESOURCE_ALLOCATION: + case REQUEST_SUBMIT_BATCH_JOB: + case REQUEST_JOB_WILL_RUN: + case REQUEST_UPDATE_JOB: + _pack_job_desc_msg((job_desc_msg_t *) + msg->data, buffer); + break; + case REQUEST_JOB_END_TIME: + case REQUEST_JOB_ALLOCATION_INFO: + _pack_job_alloc_info_msg((job_alloc_info_msg_t *) msg->data, + buffer); + break; + case REQUEST_NODE_REGISTRATION_STATUS: + case REQUEST_RECONFIGURE: + case REQUEST_SHUTDOWN_IMMEDIATE: + case REQUEST_PING: + case REQUEST_CONTROL: + /* Message contains no body/information */ + break; + case REQUEST_SHUTDOWN: + _pack_shutdown_msg((shutdown_msg_t *) msg->data, buffer); + break; + case RESPONSE_SUBMIT_BATCH_JOB: + _pack_submit_response_msg((submit_response_msg_t *) + msg->data, buffer); + break; + case RESPONSE_JOB_ALLOCATION_INFO_LITE: + case RESPONSE_RESOURCE_ALLOCATION: + case RESPONSE_JOB_WILL_RUN: + _pack_resource_allocation_response_msg + ((resource_allocation_response_msg_t *) msg->data, + buffer); + break; + case RESPONSE_JOB_ALLOCATION_INFO: + _pack_job_alloc_info_response_msg( + (job_alloc_info_response_msg_t *) + msg->data, buffer); + break; + case REQUEST_UPDATE_NODE: + _pack_update_node_msg((update_node_msg_t *) msg->data, + buffer); + break; + case REQUEST_UPDATE_PARTITION: + _pack_update_partition_msg((update_part_msg_t *) msg-> + data, buffer); + break; + case REQUEST_DELETE_PARTITION: + _pack_delete_partition_msg((delete_part_msg_t *) msg-> + data, buffer); + break; + case REQUEST_REATTACH_TASKS: + _pack_reattach_tasks_request_msg( (reattach_tasks_request_msg_t *) msg->data, buffer); - break; - case RESPONSE_REATTACH_TASKS: - _pack_reattach_tasks_response_msg( + break; + case RESPONSE_REATTACH_TASKS: + _pack_reattach_tasks_response_msg( (reattach_tasks_response_msg_t *) msg->data, buffer); - break; - case REQUEST_LAUNCH_TASKS: - _pack_launch_tasks_request_msg( - (launch_tasks_request_msg_t *) msg->data, - buffer); - break; - case RESPONSE_LAUNCH_TASKS: - _pack_launch_tasks_response_msg((launch_tasks_response_msg_t - *) msg->data, buffer); - break; - case REQUEST_SPAWN_TASK: - _pack_spawn_task_request_msg( - (spawn_task_request_msg_t *) - msg->data, buffer); - break; - case REQUEST_SIGNAL_TASKS: - case REQUEST_TERMINATE_TASKS: - _pack_cancel_tasks_msg((kill_tasks_msg_t *) msg->data, - buffer); - break; - case REQUEST_JOB_STEP_INFO: - _pack_job_step_info_req_msg((job_step_info_request_msg_t - *) msg->data, buffer); - break; - case REQUEST_JOB_INFO: + break; + case REQUEST_LAUNCH_TASKS: + _pack_launch_tasks_request_msg( + (launch_tasks_request_msg_t *) msg->data, + buffer); + break; + case RESPONSE_LAUNCH_TASKS: + _pack_launch_tasks_response_msg((launch_tasks_response_msg_t + *) msg->data, buffer); + break; + case REQUEST_SPAWN_TASK: + _pack_spawn_task_request_msg( + (spawn_task_request_msg_t *) + msg->data, buffer); + break; + case REQUEST_SIGNAL_TASKS: + case REQUEST_TERMINATE_TASKS: + _pack_cancel_tasks_msg((kill_tasks_msg_t *) msg->data, + buffer); + break; + case REQUEST_JOB_STEP_INFO: + _pack_job_step_info_req_msg((job_step_info_request_msg_t + *) msg->data, buffer); + break; + case REQUEST_JOB_INFO: _pack_job_info_request_msg((job_info_request_msg_t *) - msg->data, buffer); + msg->data, buffer); break; - case REQUEST_CANCEL_JOB_STEP: - _pack_job_step_kill_msg((job_step_kill_msg_t *) - msg->data, buffer); - break; - case REQUEST_COMPLETE_JOB_ALLOCATION: - _pack_complete_job_allocation_msg( - (complete_job_allocation_msg_t *)msg->data, buffer); - break; - case REQUEST_COMPLETE_BATCH_SCRIPT: - _pack_complete_batch_script_msg( - (complete_batch_script_msg_t *)msg->data, buffer); - break; - case REQUEST_STEP_COMPLETE: - _pack_step_complete_msg((step_complete_msg_t *)msg->data, - buffer); - break; - case MESSAGE_STAT_JOBACCT: - _pack_stat_jobacct_msg((stat_jobacct_msg_t *) msg->data, + case REQUEST_CANCEL_JOB_STEP: + _pack_job_step_kill_msg((job_step_kill_msg_t *) + msg->data, buffer); + break; + case REQUEST_COMPLETE_JOB_ALLOCATION: + _pack_complete_job_allocation_msg( + (complete_job_allocation_msg_t *)msg->data, buffer); + break; + case REQUEST_COMPLETE_BATCH_SCRIPT: + _pack_complete_batch_script_msg( + (complete_batch_script_msg_t *)msg->data, buffer); + break; + case REQUEST_STEP_COMPLETE: + _pack_step_complete_msg((step_complete_msg_t *)msg->data, buffer); - break; - case REQUEST_SIGNAL_JOB: - _pack_signal_job_msg((signal_job_msg_t *) msg->data, buffer); - break; - case REQUEST_KILL_TIMELIMIT: - case REQUEST_TERMINATE_JOB: - _pack_kill_job_msg((kill_job_msg_t *) msg->data, buffer); - break; - case MESSAGE_EPILOG_COMPLETE: - _pack_epilog_comp_msg((epilog_complete_msg_t *) msg->data, buffer); break; - case REQUEST_UPDATE_JOB_TIME: - _pack_update_job_time_msg((job_time_msg_t *) - msg->data, buffer); - break; - case RESPONSE_RECONFIGURE: - case RESPONSE_SHUTDOWN: - case RESPONSE_CANCEL_JOB_STEP: - break; - case REQUEST_JOB_ATTACH: - break; - case RESPONSE_JOB_ATTACH: - break; - case RESPONSE_JOB_STEP_INFO: - _pack_job_step_info_msg((slurm_msg_t *) msg, buffer); - break; - case REQUEST_JOB_RESOURCE: - break; - case RESPONSE_JOB_RESOURCE: - break; - case REQUEST_RUN_JOB_STEP: - break; - case RESPONSE_RUN_JOB_STEP: - break; - case MESSAGE_TASK_EXIT: - _pack_task_exit_msg((task_exit_msg_t *) msg->data, buffer); - break; - case REQUEST_BATCH_JOB_LAUNCH: - _pack_batch_job_launch_msg((batch_job_launch_msg_t *) - msg->data, buffer); - break; - case RESPONSE_JOB_READY: - case RESPONSE_SLURM_RC: - _pack_return_code_msg((return_code_msg_t *) msg->data, + case MESSAGE_STAT_JOBACCT: + _pack_stat_jobacct_msg((stat_jobacct_msg_t *) msg->data, buffer); - break; - case RESPONSE_JOB_STEP_CREATE: - _pack_job_step_create_response_msg( - (job_step_create_response_msg_t *) - msg->data, buffer); - break; - case REQUEST_JOB_STEP_CREATE: - _pack_job_step_create_request_msg( - (job_step_create_request_msg_t *) - msg->data, buffer); - break; - case REQUEST_JOB_ID: - _pack_job_id_request_msg( - (job_id_request_msg_t *)msg->data, - buffer); - break; - case RESPONSE_JOB_ID: - _pack_job_id_response_msg( - (job_id_response_msg_t *)msg->data, - buffer); - break; - case SRUN_PING: + break; + case REQUEST_SIGNAL_JOB: + _pack_signal_job_msg((signal_job_msg_t *) msg->data, buffer); + break; + case REQUEST_KILL_TIMELIMIT: + case REQUEST_TERMINATE_JOB: + _pack_kill_job_msg((kill_job_msg_t *) msg->data, buffer); + break; + case MESSAGE_EPILOG_COMPLETE: + _pack_epilog_comp_msg((epilog_complete_msg_t *) msg->data, buffer); + break; + case REQUEST_UPDATE_JOB_TIME: + _pack_update_job_time_msg((job_time_msg_t *) + msg->data, buffer); + break; + case RESPONSE_RECONFIGURE: + case RESPONSE_SHUTDOWN: + case RESPONSE_CANCEL_JOB_STEP: + break; + case REQUEST_JOB_ATTACH: + break; + case RESPONSE_JOB_ATTACH: + break; + case RESPONSE_JOB_STEP_INFO: + _pack_job_step_info_msg((slurm_msg_t *) msg, buffer); + break; + case REQUEST_JOB_RESOURCE: + break; + case RESPONSE_JOB_RESOURCE: + break; + case REQUEST_RUN_JOB_STEP: + break; + case RESPONSE_RUN_JOB_STEP: + break; + case MESSAGE_TASK_EXIT: + _pack_task_exit_msg((task_exit_msg_t *) msg->data, buffer); + break; + case REQUEST_BATCH_JOB_LAUNCH: + _pack_batch_job_launch_msg((batch_job_launch_msg_t *) + msg->data, buffer); + break; + case RESPONSE_JOB_READY: + case RESPONSE_SLURM_RC: + _pack_return_code_msg((return_code_msg_t *) msg->data, + buffer); + break; + case RESPONSE_JOB_STEP_CREATE: + _pack_job_step_create_response_msg( + (job_step_create_response_msg_t *) + msg->data, buffer); + break; + case REQUEST_JOB_STEP_CREATE: + _pack_job_step_create_request_msg( + (job_step_create_request_msg_t *) + msg->data, buffer); + break; + case REQUEST_JOB_ID: + _pack_job_id_request_msg( + (job_id_request_msg_t *)msg->data, + buffer); + break; + case RESPONSE_JOB_ID: + _pack_job_id_response_msg( + (job_id_response_msg_t *)msg->data, + buffer); + break; + case SRUN_PING: _pack_srun_ping_msg((srun_ping_msg_t *)msg->data, buffer); break; - case SRUN_NODE_FAIL: + case SRUN_NODE_FAIL: _pack_srun_node_fail_msg((srun_node_fail_msg_t *)msg->data, - buffer); + buffer); break; - case SRUN_TIMEOUT: + case SRUN_TIMEOUT: _pack_srun_timeout_msg((srun_timeout_msg_t *)msg->data, buffer); break; - case REQUEST_CHECKPOINT: + case REQUEST_CHECKPOINT: _pack_checkpoint_msg((checkpoint_msg_t *)msg->data, buffer); break; - case REQUEST_CHECKPOINT_COMP: + case REQUEST_CHECKPOINT_COMP: _pack_checkpoint_comp((checkpoint_comp_msg_t *)msg->data, - buffer); + buffer); break; - case RESPONSE_CHECKPOINT: - case RESPONSE_CHECKPOINT_COMP: + case RESPONSE_CHECKPOINT: + case RESPONSE_CHECKPOINT_COMP: _pack_checkpoint_resp_msg((checkpoint_resp_msg_t *)msg->data, - buffer); + buffer); break; - case REQUEST_SUSPEND: + case REQUEST_SUSPEND: _pack_suspend_msg((suspend_msg_t *)msg->data, buffer); break; - case REQUEST_JOB_READY: - case REQUEST_JOB_REQUEUE: + case REQUEST_JOB_READY: + case REQUEST_JOB_REQUEUE: _pack_job_ready_msg((job_id_msg_t *)msg->data, buffer); break; - case REQUEST_NODE_SELECT_INFO: + case REQUEST_NODE_SELECT_INFO: _pack_node_select_info_req_msg( (node_info_select_request_msg_t *) msg->data, buffer); break; - case RESPONSE_NODE_SELECT_INFO: + case RESPONSE_NODE_SELECT_INFO: _pack_node_select_info_msg((slurm_msg_t *) msg, buffer); break; - case REQUEST_FILE_BCAST: + case REQUEST_FILE_BCAST: _pack_file_bcast((file_bcast_msg_t *) msg->data, buffer); break; - case PMI_KVS_PUT_REQ: - case PMI_KVS_GET_RESP: + case PMI_KVS_PUT_REQ: + case PMI_KVS_GET_RESP: _pack_kvs_data((struct kvs_comm_set *) msg->data, buffer); break; - case PMI_KVS_GET_REQ: + case PMI_KVS_GET_REQ: _pack_kvs_get((kvs_get_msg_t *) msg->data, buffer); break; - case PMI_KVS_PUT_RESP: + case PMI_KVS_PUT_RESP: break; /* no data in message */ - default: - debug("No pack method for msg type %i", msg->msg_type); - return EINVAL; - break; + default: + debug("No pack method for msg type %i", msg->msg_type); + return EINVAL; + break; } return SLURM_SUCCESS; @@ -670,281 +685,288 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) msg->data = NULL; /* Initialize to no data for now */ switch (msg->msg_type) { - case REQUEST_NODE_INFO: + case REQUEST_NODE_INFO: rc = _unpack_node_info_request_msg((node_info_request_msg_t **) - & (msg->data), buffer); + & (msg->data), buffer); break; - case REQUEST_PARTITION_INFO: + case REQUEST_PARTITION_INFO: rc = _unpack_part_info_request_msg((part_info_request_msg_t **) - & (msg->data), buffer); + & (msg->data), buffer); break; - case REQUEST_BUILD_INFO: - case REQUEST_ACCTING_INFO: - rc = _unpack_last_update_msg((last_update_msg_t **) & - (msg->data), buffer); - break; - case RESPONSE_BUILD_INFO: - rc = _unpack_slurm_ctl_conf_msg((slurm_ctl_conf_info_msg_t - **) + case REQUEST_BUILD_INFO: + case REQUEST_ACCTING_INFO: + rc = _unpack_last_update_msg((last_update_msg_t **) & + (msg->data), buffer); + break; + case RESPONSE_BUILD_INFO: + rc = _unpack_slurm_ctl_conf_msg((slurm_ctl_conf_info_msg_t + **) + & (msg->data), buffer); + break; + case RESPONSE_JOB_INFO: + rc = _unpack_job_info_msg((job_info_msg_t **) & (msg->data), + buffer); + break; + case RESPONSE_PARTITION_INFO: + rc = _unpack_partition_info_msg((partition_info_msg_t **) & + (msg->data), buffer); + break; + case RESPONSE_NODE_INFO: + rc = _unpack_node_info_msg((node_info_msg_t **) & + (msg->data), buffer); + break; + case MESSAGE_NODE_REGISTRATION_STATUS: + rc = _unpack_node_registration_status_msg( + (slurm_node_registration_status_msg_t **) + & (msg->data), buffer); + break; + case REQUEST_JOB_ALLOCATION_INFO_LITE: + case REQUEST_RESOURCE_ALLOCATION: + case REQUEST_SUBMIT_BATCH_JOB: + case REQUEST_JOB_WILL_RUN: + case REQUEST_UPDATE_JOB: + rc = _unpack_job_desc_msg((job_desc_msg_t **) & (msg->data), + buffer); + break; + case REQUEST_JOB_END_TIME: + case REQUEST_JOB_ALLOCATION_INFO: + rc = _unpack_job_alloc_info_msg((job_alloc_info_msg_t **) & + (msg->data), buffer); + break; + case REQUEST_NODE_REGISTRATION_STATUS: + case REQUEST_RECONFIGURE: + case REQUEST_SHUTDOWN_IMMEDIATE: + case REQUEST_PING: + case REQUEST_CONTROL: + /* Message contains no body/information */ + break; + case REQUEST_SHUTDOWN: + rc = _unpack_shutdown_msg((shutdown_msg_t **) & (msg->data), + buffer); + break; + case RESPONSE_SUBMIT_BATCH_JOB: + rc = _unpack_submit_response_msg((submit_response_msg_t **) & (msg->data), buffer); - break; - case RESPONSE_JOB_INFO: - rc = _unpack_job_info_msg((job_info_msg_t **) & (msg->data), - buffer); - break; - case RESPONSE_PARTITION_INFO: - rc = _unpack_partition_info_msg((partition_info_msg_t **) & - (msg->data), buffer); - break; - case RESPONSE_NODE_INFO: - rc = _unpack_node_info_msg((node_info_msg_t **) & - (msg->data), buffer); - break; - case MESSAGE_NODE_REGISTRATION_STATUS: - rc = _unpack_node_registration_status_msg( - (slurm_node_registration_status_msg_t **) - & (msg->data), buffer); - break; - case REQUEST_RESOURCE_ALLOCATION: - case REQUEST_SUBMIT_BATCH_JOB: - case REQUEST_JOB_WILL_RUN: - case REQUEST_UPDATE_JOB: - rc = _unpack_job_desc_msg((job_desc_msg_t **) & (msg->data), - buffer); - break; - case REQUEST_JOB_END_TIME: - case REQUEST_OLD_JOB_RESOURCE_ALLOCATION: - rc = _unpack_old_job_desc_msg((old_job_alloc_msg_t **) & - (msg->data), buffer); - break; - case REQUEST_NODE_REGISTRATION_STATUS: - case REQUEST_RECONFIGURE: - case REQUEST_SHUTDOWN_IMMEDIATE: - case REQUEST_PING: - case REQUEST_CONTROL: - /* Message contains no body/information */ - break; - case REQUEST_SHUTDOWN: - rc = _unpack_shutdown_msg((shutdown_msg_t **) & (msg->data), - buffer); - break; - case RESPONSE_SUBMIT_BATCH_JOB: - rc = _unpack_submit_response_msg((submit_response_msg_t **) - & (msg->data), buffer); - break; - case RESPONSE_RESOURCE_ALLOCATION: - case RESPONSE_JOB_WILL_RUN: - rc = _unpack_resource_allocation_response_msg( - (resource_allocation_response_msg_t **) - & (msg->data), buffer); - break; - case REQUEST_UPDATE_NODE: - rc = _unpack_update_node_msg((update_node_msg_t **) & - (msg->data), buffer); - break; - case REQUEST_UPDATE_PARTITION: - rc = _unpack_update_partition_msg((update_part_msg_t **) & - (msg->data), buffer); - break; - case REQUEST_DELETE_PARTITION: - rc = _unpack_delete_partition_msg((delete_part_msg_t **) & - (msg->data), buffer); - break; - case REQUEST_LAUNCH_TASKS: - rc = _unpack_launch_tasks_request_msg( - (launch_tasks_request_msg_t **) - & (msg->data), buffer); - break; - case RESPONSE_LAUNCH_TASKS: - rc = _unpack_launch_tasks_response_msg( - (launch_tasks_response_msg_t **) - & (msg->data), buffer); - break; - case REQUEST_SPAWN_TASK: - rc = _unpack_spawn_task_request_msg( - (spawn_task_request_msg_t **) - & (msg->data), buffer); - break; - case REQUEST_REATTACH_TASKS: - rc = _unpack_reattach_tasks_request_msg( + break; + case RESPONSE_JOB_ALLOCATION_INFO_LITE: + case RESPONSE_RESOURCE_ALLOCATION: + case RESPONSE_JOB_WILL_RUN: + rc = _unpack_resource_allocation_response_msg( + (resource_allocation_response_msg_t **) + & (msg->data), buffer); + break; + case RESPONSE_JOB_ALLOCATION_INFO: + rc = _unpack_job_alloc_info_response_msg( + (job_alloc_info_response_msg_t **) + & (msg->data), buffer); + break; + case REQUEST_UPDATE_NODE: + rc = _unpack_update_node_msg((update_node_msg_t **) & + (msg->data), buffer); + break; + case REQUEST_UPDATE_PARTITION: + rc = _unpack_update_partition_msg((update_part_msg_t **) & + (msg->data), buffer); + break; + case REQUEST_DELETE_PARTITION: + rc = _unpack_delete_partition_msg((delete_part_msg_t **) & + (msg->data), buffer); + break; + case REQUEST_LAUNCH_TASKS: + rc = _unpack_launch_tasks_request_msg( + (launch_tasks_request_msg_t **) + & (msg->data), buffer); + break; + case RESPONSE_LAUNCH_TASKS: + rc = _unpack_launch_tasks_response_msg( + (launch_tasks_response_msg_t **) + & (msg->data), buffer); + break; + case REQUEST_SPAWN_TASK: + rc = _unpack_spawn_task_request_msg( + (spawn_task_request_msg_t **) + & (msg->data), buffer); + break; + case REQUEST_REATTACH_TASKS: + rc = _unpack_reattach_tasks_request_msg( (reattach_tasks_request_msg_t **) & msg->data, buffer); - break; - case RESPONSE_REATTACH_TASKS: - rc = _unpack_reattach_tasks_response_msg( - (reattach_tasks_response_msg_t **) - & msg->data, buffer); - break; - case REQUEST_SIGNAL_TASKS: - case REQUEST_TERMINATE_TASKS: - rc = _unpack_cancel_tasks_msg((kill_tasks_msg_t **) & - (msg->data), buffer); - break; - case REQUEST_JOB_STEP_INFO: - rc = _unpack_job_step_info_req_msg( - (job_step_info_request_msg_t **) - & (msg->data), buffer); - break; + break; + case RESPONSE_REATTACH_TASKS: + rc = _unpack_reattach_tasks_response_msg( + (reattach_tasks_response_msg_t **) + & msg->data, buffer); + break; + case REQUEST_SIGNAL_TASKS: + case REQUEST_TERMINATE_TASKS: + rc = _unpack_cancel_tasks_msg((kill_tasks_msg_t **) & + (msg->data), buffer); + break; + case REQUEST_JOB_STEP_INFO: + rc = _unpack_job_step_info_req_msg( + (job_step_info_request_msg_t **) + & (msg->data), buffer); + break; /******** job_step_id_t Messages ********/ - case REQUEST_JOB_INFO: - rc = _unpack_job_info_request_msg((job_info_request_msg_t**) - & (msg->data), buffer); - break; - case REQUEST_CANCEL_JOB_STEP: - rc = _unpack_job_step_kill_msg((job_step_kill_msg_t **) - & (msg->data), buffer); - break; - case REQUEST_COMPLETE_JOB_ALLOCATION: - rc = _unpack_complete_job_allocation_msg( - (complete_job_allocation_msg_t **)&msg->data, buffer); - break; - case REQUEST_COMPLETE_BATCH_SCRIPT: - rc = _unpack_complete_batch_script_msg( - (complete_batch_script_msg_t **)&msg->data, buffer); - break; - case REQUEST_STEP_COMPLETE: - rc = _unpack_step_complete_msg((step_complete_msg_t - **) & (msg->data), - buffer); - break; - case MESSAGE_STAT_JOBACCT: - rc = _unpack_stat_jobacct_msg( - (stat_jobacct_msg_t **) &(msg->data), buffer); - break; - case REQUEST_SIGNAL_JOB: - rc = _unpack_signal_job_msg((signal_job_msg_t **)&(msg->data), - buffer); - break; - case REQUEST_KILL_TIMELIMIT: - case REQUEST_TERMINATE_JOB: - rc = _unpack_kill_job_msg((kill_job_msg_t **) & (msg->data), - buffer); - break; - case MESSAGE_EPILOG_COMPLETE: + case REQUEST_JOB_INFO: + rc = _unpack_job_info_request_msg((job_info_request_msg_t**) + & (msg->data), buffer); + break; + case REQUEST_CANCEL_JOB_STEP: + rc = _unpack_job_step_kill_msg((job_step_kill_msg_t **) + & (msg->data), buffer); + break; + case REQUEST_COMPLETE_JOB_ALLOCATION: + rc = _unpack_complete_job_allocation_msg( + (complete_job_allocation_msg_t **)&msg->data, buffer); + break; + case REQUEST_COMPLETE_BATCH_SCRIPT: + rc = _unpack_complete_batch_script_msg( + (complete_batch_script_msg_t **)&msg->data, buffer); + break; + case REQUEST_STEP_COMPLETE: + rc = _unpack_step_complete_msg((step_complete_msg_t + **) & (msg->data), + buffer); + break; + case MESSAGE_STAT_JOBACCT: + rc = _unpack_stat_jobacct_msg( + (stat_jobacct_msg_t **) &(msg->data), buffer); + break; + case REQUEST_SIGNAL_JOB: + rc = _unpack_signal_job_msg((signal_job_msg_t **)&(msg->data), + buffer); + break; + case REQUEST_KILL_TIMELIMIT: + case REQUEST_TERMINATE_JOB: + rc = _unpack_kill_job_msg((kill_job_msg_t **) & (msg->data), + buffer); + break; + case MESSAGE_EPILOG_COMPLETE: rc = _unpack_epilog_comp_msg((epilog_complete_msg_t **) - & (msg->data), buffer); - break; - case REQUEST_UPDATE_JOB_TIME: - rc = _unpack_update_job_time_msg( - (job_time_msg_t **) - & (msg->data), buffer); - break; - case RESPONSE_RECONFIGURE: - case RESPONSE_SHUTDOWN: - case RESPONSE_CANCEL_JOB_STEP: - break; - case REQUEST_JOB_ATTACH: - break; - case RESPONSE_JOB_ATTACH: - break; - case RESPONSE_JOB_STEP_INFO: - rc = _unpack_job_step_info_response_msg( - (job_step_info_response_msg_t **) - & (msg->data), buffer); - break; - case REQUEST_JOB_RESOURCE: - break; - case RESPONSE_JOB_RESOURCE: - break; - case REQUEST_RUN_JOB_STEP: - break; - case RESPONSE_RUN_JOB_STEP: - break; - case MESSAGE_TASK_EXIT: - rc = _unpack_task_exit_msg((task_exit_msg_t **) - & (msg->data), buffer); - break; - case REQUEST_BATCH_JOB_LAUNCH: - rc = _unpack_batch_job_launch_msg((batch_job_launch_msg_t **) - & (msg->data), buffer); - break; - case RESPONSE_JOB_READY: - case RESPONSE_SLURM_RC: - rc = _unpack_return_code_msg((return_code_msg_t **) - & (msg->data), buffer); - break; - case RESPONSE_JOB_STEP_CREATE: - rc = _unpack_job_step_create_response_msg( - (job_step_create_response_msg_t **) - & msg->data, buffer); - break; - case REQUEST_JOB_STEP_CREATE: - rc = _unpack_job_step_create_request_msg( - (job_step_create_request_msg_t **) - & msg->data, buffer); - break; - case REQUEST_JOB_ID: - rc = _unpack_job_id_request_msg( - (job_id_request_msg_t **) & msg->data, - buffer); - break; - case RESPONSE_JOB_ID: - rc = _unpack_job_id_response_msg( - (job_id_response_msg_t **) & msg->data, - buffer); - break; - case SRUN_PING: + & (msg->data), buffer); + break; + case REQUEST_UPDATE_JOB_TIME: + rc = _unpack_update_job_time_msg( + (job_time_msg_t **) + & (msg->data), buffer); + break; + case RESPONSE_RECONFIGURE: + case RESPONSE_SHUTDOWN: + case RESPONSE_CANCEL_JOB_STEP: + break; + case REQUEST_JOB_ATTACH: + break; + case RESPONSE_JOB_ATTACH: + break; + case RESPONSE_JOB_STEP_INFO: + rc = _unpack_job_step_info_response_msg( + (job_step_info_response_msg_t **) + & (msg->data), buffer); + break; + case REQUEST_JOB_RESOURCE: + break; + case RESPONSE_JOB_RESOURCE: + break; + case REQUEST_RUN_JOB_STEP: + break; + case RESPONSE_RUN_JOB_STEP: + break; + case MESSAGE_TASK_EXIT: + rc = _unpack_task_exit_msg((task_exit_msg_t **) + & (msg->data), buffer); + break; + case REQUEST_BATCH_JOB_LAUNCH: + rc = _unpack_batch_job_launch_msg((batch_job_launch_msg_t **) + & (msg->data), buffer); + break; + case RESPONSE_JOB_READY: + case RESPONSE_SLURM_RC: + rc = _unpack_return_code_msg((return_code_msg_t **) + & (msg->data), buffer); + break; + case RESPONSE_JOB_STEP_CREATE: + rc = _unpack_job_step_create_response_msg( + (job_step_create_response_msg_t **) + & msg->data, buffer); + break; + case REQUEST_JOB_STEP_CREATE: + rc = _unpack_job_step_create_request_msg( + (job_step_create_request_msg_t **) + & msg->data, buffer); + break; + case REQUEST_JOB_ID: + rc = _unpack_job_id_request_msg( + (job_id_request_msg_t **) & msg->data, + buffer); + break; + case RESPONSE_JOB_ID: + rc = _unpack_job_id_response_msg( + (job_id_response_msg_t **) & msg->data, + buffer); + break; + case SRUN_PING: rc = _unpack_srun_ping_msg((srun_ping_msg_t **) & msg->data, - buffer); + buffer); break; - case SRUN_NODE_FAIL: + case SRUN_NODE_FAIL: rc = _unpack_srun_node_fail_msg((srun_node_fail_msg_t **) - & msg->data, buffer); + & msg->data, buffer); break; - case SRUN_TIMEOUT: + case SRUN_TIMEOUT: rc = _unpack_srun_timeout_msg((srun_timeout_msg_t **) - & msg->data, buffer); + & msg->data, buffer); break; - case REQUEST_CHECKPOINT: + case REQUEST_CHECKPOINT: rc = _unpack_checkpoint_msg((checkpoint_msg_t **) - & msg->data, buffer); + & msg->data, buffer); break; - case REQUEST_CHECKPOINT_COMP: + case REQUEST_CHECKPOINT_COMP: rc = _unpack_checkpoint_comp((checkpoint_comp_msg_t **) - & msg->data, buffer); + & msg->data, buffer); break; - case RESPONSE_CHECKPOINT: - case RESPONSE_CHECKPOINT_COMP: + case RESPONSE_CHECKPOINT: + case RESPONSE_CHECKPOINT_COMP: rc = _unpack_checkpoint_resp_msg((checkpoint_resp_msg_t **) - & msg->data, buffer); + & msg->data, buffer); break; - case REQUEST_SUSPEND: + case REQUEST_SUSPEND: rc = _unpack_suspend_msg((suspend_msg_t **) &msg->data, - buffer); + buffer); break; - case REQUEST_JOB_READY: - case REQUEST_JOB_REQUEUE: + case REQUEST_JOB_READY: + case REQUEST_JOB_REQUEUE: rc = _unpack_job_ready_msg((job_id_msg_t **) - & msg->data, buffer); + & msg->data, buffer); break; - case REQUEST_NODE_SELECT_INFO: + case REQUEST_NODE_SELECT_INFO: rc = _unpack_node_select_info_req_msg( - (node_info_select_request_msg_t **) &msg->data, - buffer); + (node_info_select_request_msg_t **) &msg->data, + buffer); break; - case RESPONSE_NODE_SELECT_INFO: + case RESPONSE_NODE_SELECT_INFO: rc = _unpack_node_select_info_msg((node_select_info_msg_t **) & - (msg->data), buffer); + (msg->data), buffer); break; - case REQUEST_FILE_BCAST: + case REQUEST_FILE_BCAST: rc = _unpack_file_bcast( (file_bcast_msg_t **) - & msg->data, buffer); + & msg->data, buffer); break; - case PMI_KVS_PUT_REQ: - case PMI_KVS_GET_RESP: + case PMI_KVS_PUT_REQ: + case PMI_KVS_GET_RESP: rc = _unpack_kvs_data((struct kvs_comm_set **) &msg->data, - buffer); + buffer); break; - case PMI_KVS_GET_REQ: + case PMI_KVS_GET_REQ: rc = _unpack_kvs_get((kvs_get_msg_t **) &msg->data, buffer); break; - case PMI_KVS_PUT_RESP: + case PMI_KVS_PUT_RESP: break; /* no data */ - default: - debug("No unpack method for msg type %i", msg->msg_type); - return EINVAL; - break; + default: + debug("No unpack method for msg type %i", msg->msg_type); + return EINVAL; + break; } if (rc) @@ -978,7 +1000,7 @@ _unpack_update_node_msg(update_node_msg_t ** msg, Buf buffer) safe_unpackstr_xmalloc(&tmp_ptr->reason, &uint16_tmp, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr->node_names); xfree(tmp_ptr->reason); xfree(tmp_ptr); @@ -1034,25 +1056,25 @@ _unpack_node_registration_status_msg(slurm_node_registration_status_msg_t safe_unpack32(&node_reg_ptr->temporary_disk_space, buffer); safe_unpack32(&node_reg_ptr->job_count, buffer); node_reg_ptr->job_id = - xmalloc(sizeof(uint32_t) * node_reg_ptr->job_count); + xmalloc(sizeof(uint32_t) * node_reg_ptr->job_count); for (i = 0; i < node_reg_ptr->job_count; i++) { safe_unpack32(&node_reg_ptr->job_id[i], buffer); } node_reg_ptr->step_id = - xmalloc(sizeof(uint16_t) * node_reg_ptr->job_count); + xmalloc(sizeof(uint16_t) * node_reg_ptr->job_count); for (i = 0; i < node_reg_ptr->job_count; i++) { safe_unpack16(&node_reg_ptr->step_id[i], buffer); } safe_unpack16(&node_reg_ptr->startup, buffer); if (node_reg_ptr->startup - && (switch_g_alloc_node_info(&node_reg_ptr->switch_nodeinfo) - || switch_g_unpack_node_info(node_reg_ptr->switch_nodeinfo, buffer))) + && (switch_g_alloc_node_info(&node_reg_ptr->switch_nodeinfo) + || switch_g_unpack_node_info(node_reg_ptr->switch_nodeinfo, buffer))) goto unpack_error; return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(node_reg_ptr->node_name); xfree(node_reg_ptr->job_id); xfree(node_reg_ptr->step_id); @@ -1131,12 +1153,96 @@ _unpack_resource_allocation_response_msg(resource_allocation_response_msg_t tmp_ptr->node_addr = NULL; if (select_g_alloc_jobinfo (&tmp_ptr->select_jobinfo) - || select_g_unpack_jobinfo(tmp_ptr->select_jobinfo, buffer)) + || select_g_unpack_jobinfo(tmp_ptr->select_jobinfo, buffer)) goto unpack_error; return SLURM_SUCCESS; - unpack_error: +unpack_error: + select_g_free_jobinfo(&tmp_ptr->select_jobinfo); + xfree(tmp_ptr->node_list); + xfree(tmp_ptr->cpus_per_node); + xfree(tmp_ptr->cpu_count_reps); + xfree(tmp_ptr); + *msg = NULL; + return SLURM_ERROR; +} + +static void +_pack_job_alloc_info_response_msg(job_alloc_info_response_msg_t * msg, + Buf buffer) +{ + xassert(msg != NULL); + + pack32((uint32_t)msg->error_code, buffer); + pack32((uint32_t)msg->job_id, buffer); + packstr(msg->node_list, buffer); + + pack16((uint16_t)msg->num_cpu_groups, buffer); + if (msg->num_cpu_groups) { + pack32_array(msg->cpus_per_node, msg->num_cpu_groups, buffer); + pack32_array(msg->cpu_count_reps, msg->num_cpu_groups, buffer); + } + + pack16((uint16_t)msg->node_cnt, buffer); + if (msg->node_cnt > 0) + _pack_slurm_addr_array(msg->node_addr, msg->node_cnt, buffer); + + select_g_pack_jobinfo(msg->select_jobinfo, buffer); +} + +static int +_unpack_job_alloc_info_response_msg(job_alloc_info_response_msg_t ** msg, + Buf buffer) +{ + uint16_t uint16_tmp; + uint32_t uint32_tmp; + job_alloc_info_response_msg_t *tmp_ptr; + + /* alloc memory for structure */ + xassert(msg != NULL); + tmp_ptr = xmalloc(sizeof(job_alloc_info_response_msg_t)); + *msg = tmp_ptr; + + /* load the data values */ + safe_unpack32(&tmp_ptr->error_code, buffer); + safe_unpack32(&tmp_ptr->job_id, buffer); + safe_unpackstr_xmalloc(&tmp_ptr->node_list, &uint16_tmp, buffer); + + safe_unpack16(&tmp_ptr->num_cpu_groups, buffer); + if (tmp_ptr->num_cpu_groups > 0) { + safe_unpack32_array((uint32_t **) & + (tmp_ptr->cpus_per_node), &uint32_tmp, + buffer); + if (tmp_ptr->num_cpu_groups != uint32_tmp) + goto unpack_error; + safe_unpack32_array((uint32_t **) & + (tmp_ptr->cpu_count_reps), &uint32_tmp, + buffer); + if (tmp_ptr->num_cpu_groups != uint32_tmp) + goto unpack_error; + } else { + tmp_ptr->cpus_per_node = NULL; + tmp_ptr->cpu_count_reps = NULL; + } + + safe_unpack16(&tmp_ptr->node_cnt, buffer); + if (tmp_ptr->node_cnt > 0) { + if (_unpack_slurm_addr_array(&(tmp_ptr->node_addr), + &uint16_tmp, buffer)) + goto unpack_error; + if (uint16_tmp != tmp_ptr->node_cnt) + goto unpack_error; + } else + tmp_ptr->node_addr = NULL; + + if (select_g_alloc_jobinfo (&tmp_ptr->select_jobinfo) + || select_g_unpack_jobinfo(tmp_ptr->select_jobinfo, buffer)) + goto unpack_error; + + return SLURM_SUCCESS; + +unpack_error: select_g_free_jobinfo(&tmp_ptr->select_jobinfo); xfree(tmp_ptr->node_list); xfree(tmp_ptr->cpus_per_node); @@ -1172,7 +1278,7 @@ _unpack_submit_response_msg(submit_response_msg_t ** msg, Buf buffer) safe_unpack32(&tmp_ptr->error_code, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr); *msg = NULL; return SLURM_ERROR; @@ -1192,7 +1298,7 @@ _unpack_node_info_msg(node_info_msg_t ** msg, Buf buffer) safe_unpack_time(&((*msg)->last_update), buffer); node = (*msg)->node_array = - xmalloc(sizeof(node_info_t) * (*msg)->record_count); + xmalloc(sizeof(node_info_t) * (*msg)->record_count); /* load individual job info */ for (i = 0; i < (*msg)->record_count; i++) { @@ -1202,7 +1308,7 @@ _unpack_node_info_msg(node_info_msg_t ** msg, Buf buffer) } return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(node); xfree(*msg); *msg = NULL; @@ -1227,7 +1333,7 @@ _unpack_node_info_members(node_info_t * node, Buf buffer) return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(node->name); xfree(node->features); xfree(node->reason); @@ -1235,7 +1341,7 @@ _unpack_node_info_members(node_info_t * node, Buf buffer) } static int _unpack_node_select_info_msg(node_select_info_msg_t ** msg, - Buf buffer) + Buf buffer) { xassert(msg != NULL); @@ -1287,7 +1393,7 @@ _unpack_update_partition_msg(update_part_msg_t ** msg, Buf buffer) safe_unpack16(&tmp_ptr->state_up, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr->name); xfree(tmp_ptr->nodes); xfree(tmp_ptr->allow_groups); @@ -1319,7 +1425,7 @@ _unpack_delete_partition_msg(delete_part_msg_t ** msg, Buf buffer) safe_unpackstr_xmalloc(&tmp_ptr->name, &uint16_tmp, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr->name); xfree(tmp_ptr); *msg = NULL; @@ -1374,7 +1480,7 @@ _unpack_job_step_create_request_msg(job_step_create_request_msg_t ** msg, return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr->host); xfree(tmp_ptr->name); xfree(tmp_ptr->network); @@ -1412,12 +1518,12 @@ _unpack_kill_job_msg(kill_job_msg_t ** msg, Buf buffer) safe_unpack_time(&(tmp_ptr->time), buffer); safe_unpackstr_xmalloc(&(tmp_ptr->nodes), &uint16_tmp, buffer); if (select_g_alloc_jobinfo (&tmp_ptr->select_jobinfo) - || select_g_unpack_jobinfo(tmp_ptr->select_jobinfo, buffer)) + || select_g_unpack_jobinfo(tmp_ptr->select_jobinfo, buffer)) goto unpack_error; return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr->nodes); xfree(tmp_ptr); *msg = NULL; @@ -1450,7 +1556,7 @@ _unpack_signal_job_msg(signal_job_msg_t ** msg, Buf buffer) return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr); *msg = NULL; return SLURM_ERROR; @@ -1482,12 +1588,12 @@ _unpack_epilog_comp_msg(epilog_complete_msg_t ** msg, Buf buffer) safe_unpack32(&(tmp_ptr->return_code), buffer); safe_unpackstr_xmalloc(& (tmp_ptr->node_name), &uint16_tmp, buffer); if (switch_g_alloc_node_info(&tmp_ptr->switch_nodeinfo) - || switch_g_unpack_node_info(tmp_ptr->switch_nodeinfo, buffer)) + || switch_g_unpack_node_info(tmp_ptr->switch_nodeinfo, buffer)) goto unpack_error; return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr->node_name); switch_g_free_node_info(&tmp_ptr->switch_nodeinfo); xfree(tmp_ptr); @@ -1518,7 +1624,7 @@ _unpack_update_job_time_msg(job_time_msg_t ** msg, Buf buffer) safe_unpack_time(& (tmp_ptr->expiration_time), buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr); *msg = NULL; return SLURM_ERROR; @@ -1568,7 +1674,7 @@ _unpack_job_step_create_response_msg(job_step_create_response_msg_t ** msg, } return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr->node_list); xfree(tmp_ptr); *msg = NULL; @@ -1598,7 +1704,7 @@ _unpack_partition_info_msg(partition_info_msg_t ** msg, Buf buffer) safe_unpack_time(&((*msg)->last_update), buffer); partition = (*msg)->partition_array = - xmalloc(sizeof(partition_info_t) * (*msg)->record_count); + xmalloc(sizeof(partition_info_t) * (*msg)->record_count); /* load individual job info */ for (i = 0; i < (*msg)->record_count; i++) { @@ -1607,7 +1713,7 @@ _unpack_partition_info_msg(partition_info_msg_t ** msg, Buf buffer) } return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(partition); xfree(*msg); *msg = NULL; @@ -1649,7 +1755,7 @@ _unpack_partition_info_members(partition_info_t * part, Buf buffer) } return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(part->name); xfree(part->allow_groups); xfree(part->nodes); @@ -1724,7 +1830,7 @@ _unpack_job_step_info_members(job_step_info_t * step, Buf buffer) return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(step->partition); xfree(step->nodes); xfree(step->name); @@ -1746,7 +1852,7 @@ _unpack_job_step_info_response_msg(job_step_info_response_msg_t safe_unpack32(&(*msg)->job_step_count, buffer); step = (*msg)->job_steps = - xmalloc(sizeof(job_step_info_t) * (*msg)->job_step_count); + xmalloc(sizeof(job_step_info_t) * (*msg)->job_step_count); for (i = 0; i < (*msg)->job_step_count; i++) if (_unpack_job_step_info_members(&step[i], buffer)) @@ -1754,7 +1860,7 @@ _unpack_job_step_info_response_msg(job_step_info_response_msg_t return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(step); xfree(*msg); *msg = NULL; @@ -1781,7 +1887,7 @@ _unpack_job_info_msg(job_info_msg_t ** msg, Buf buffer) safe_unpack32(&((*msg)->record_count), buffer); safe_unpack_time(&((*msg)->last_update), buffer); job = (*msg)->job_array = - xmalloc(sizeof(job_info_t) * (*msg)->record_count); + xmalloc(sizeof(job_info_t) * (*msg)->record_count); /* load individual job info */ for (i = 0; i < (*msg)->record_count; i++) { @@ -1790,7 +1896,7 @@ _unpack_job_info_msg(job_info_msg_t ** msg, Buf buffer) } return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(job); xfree(*msg); *msg = NULL; @@ -1843,7 +1949,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer) safe_unpack32(&job->num_procs, buffer); if (select_g_alloc_jobinfo(&job->select_jobinfo) - || select_g_unpack_jobinfo(job->select_jobinfo, buffer)) + || select_g_unpack_jobinfo(job->select_jobinfo, buffer)) goto unpack_error; safe_unpackstr_xmalloc(&job->features, &uint16_tmp, buffer); @@ -1877,7 +1983,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer) return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(job->nodes); xfree(job->partition); xfree(job->account); @@ -2062,7 +2168,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t ** return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(build_ptr->authtype); xfree(build_ptr->backup_addr); xfree(build_ptr->backup_controller); @@ -2173,7 +2279,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer) if(job_desc_ptr->select_jobinfo) select_g_pack_jobinfo(job_desc_ptr->select_jobinfo, buffer); else if (select_g_alloc_jobinfo(&job_desc_ptr->select_jobinfo) - == SLURM_SUCCESS) { + == SLURM_SUCCESS) { #if SYSTEM_DIMENSIONS if(job_desc_ptr->geometry[0] != (uint16_t) NO_VAL) select_g_set_jobinfo(job_desc_ptr->select_jobinfo, @@ -2269,7 +2375,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer) safe_unpackstr_xmalloc(&job_desc_ptr->mail_user, &uint16_tmp, buffer); if (select_g_alloc_jobinfo (&job_desc_ptr->select_jobinfo) - || select_g_unpack_jobinfo(job_desc_ptr->select_jobinfo, buffer)) + || select_g_unpack_jobinfo(job_desc_ptr->select_jobinfo, buffer)) goto unpack_error; #if SYSTEM_DIMENSIONS job_desc_ptr->geometry[0] = (uint16_t)NO_VAL; @@ -2278,7 +2384,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer) job_desc_ptr->rotate = (uint16_t)NO_VAL; return SLURM_SUCCESS; - unpack_error: +unpack_error: select_g_free_jobinfo(&job_desc_ptr->select_jobinfo); xfree(job_desc_ptr->features); xfree(job_desc_ptr->name); @@ -2301,28 +2407,28 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer) } static void -_pack_old_job_desc_msg(old_job_alloc_msg_t * job_desc_ptr, Buf buffer) +_pack_job_alloc_info_msg(job_alloc_info_msg_t * job_desc_ptr, Buf buffer) { /* load the data values */ pack32((uint32_t)job_desc_ptr->job_id, buffer); } static int -_unpack_old_job_desc_msg(old_job_alloc_msg_t ** +_unpack_job_alloc_info_msg(job_alloc_info_msg_t ** job_desc_buffer_ptr, Buf buffer) { - old_job_alloc_msg_t *job_desc_ptr; + job_alloc_info_msg_t *job_desc_ptr; /* alloc memory for structure */ assert(job_desc_buffer_ptr != NULL); - job_desc_ptr = xmalloc(sizeof(old_job_alloc_msg_t)); + job_desc_ptr = xmalloc(sizeof(job_alloc_info_msg_t)); *job_desc_buffer_ptr = job_desc_ptr; /* load the data values */ safe_unpack32(&job_desc_ptr->job_id, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(job_desc_ptr); *job_desc_buffer_ptr = NULL; return SLURM_ERROR; @@ -2347,7 +2453,7 @@ _unpack_last_update_msg(last_update_msg_t ** msg, Buf buffer) safe_unpack_time(&last_update_msg->last_update, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(last_update_msg); *msg = NULL; return SLURM_ERROR; @@ -2372,7 +2478,7 @@ _unpack_return_code_msg(return_code_msg_t ** msg, Buf buffer) safe_unpack32(&return_code_msg->return_code, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(return_code_msg); *msg = NULL; return SLURM_ERROR; @@ -2419,7 +2525,7 @@ _unpack_reattach_tasks_request_msg(reattach_tasks_request_msg_t ** msg_ptr, return SLURM_SUCCESS; - unpack_error: +unpack_error: slurm_free_reattach_tasks_request_msg(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2461,7 +2567,7 @@ _unpack_reattach_tasks_response_msg(reattach_tasks_response_msg_t ** msg_ptr, goto unpack_error; return SLURM_SUCCESS; - unpack_error: +unpack_error: slurm_free_reattach_tasks_response_msg(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2495,7 +2601,7 @@ _unpack_task_exit_msg(task_exit_msg_t ** msg_ptr, Buf buffer) goto unpack_error; return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2539,9 +2645,9 @@ _unpack_launch_tasks_response_msg(launch_tasks_response_msg_t ** return SLURM_SUCCESS; - unpack_error2: +unpack_error2: xfree(msg->count_of_pids); - unpack_error: +unpack_error: xfree(msg->node_name); xfree(msg); *msg_ptr = NULL; @@ -2673,7 +2779,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** } return SLURM_SUCCESS; - unpack_error: +unpack_error: slurm_free_launch_tasks_request_msg(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2705,7 +2811,7 @@ _pack_spawn_task_request_msg(spawn_task_request_msg_t * msg, Buf buffer) static int _unpack_spawn_task_request_msg(spawn_task_request_msg_t ** - msg_ptr, Buf buffer) + msg_ptr, Buf buffer) { uint16_t uint16_tmp; spawn_task_request_msg_t *msg; @@ -2742,7 +2848,7 @@ _unpack_spawn_task_request_msg(spawn_task_request_msg_t ** return SLURM_SUCCESS; - unpack_error: +unpack_error: slurm_free_spawn_task_request_msg(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2769,7 +2875,7 @@ _unpack_cancel_tasks_msg(kill_tasks_msg_t ** msg_ptr, Buf buffer) safe_unpack32(&msg->signal, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2792,7 +2898,7 @@ _unpack_shutdown_msg(shutdown_msg_t ** msg_ptr, Buf buffer) safe_unpack16(&msg->core, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2833,7 +2939,7 @@ _unpack_job_step_kill_msg(job_step_kill_msg_t ** msg_ptr, Buf buffer) safe_unpack16(&msg->batch_flag, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2860,7 +2966,7 @@ _unpack_complete_job_allocation_msg( safe_unpack32(&msg->job_rc, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2892,7 +2998,7 @@ _unpack_complete_batch_script_msg( safe_unpackstr_xmalloc(&msg->node_name, &uint16_tmp, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2958,7 +3064,7 @@ _unpack_step_complete_msg(step_complete_msg_t ** msg_ptr, Buf buffer) return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(msg); *msg_ptr = NULL; return SLURM_ERROR; @@ -2973,7 +3079,7 @@ _pack_job_info_request_msg(job_info_request_msg_t * msg, Buf buffer) static int _unpack_job_info_request_msg(job_info_request_msg_t** msg, - Buf buffer) + Buf buffer) { job_info_request_msg_t*job_info; @@ -2984,7 +3090,7 @@ _unpack_job_info_request_msg(job_info_request_msg_t** msg, safe_unpack16(&job_info->show_flags, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(job_info); *msg = NULL; return SLURM_ERROR; @@ -2998,7 +3104,7 @@ _pack_node_select_info_req_msg(node_info_select_request_msg_t *msg, Buf buffer) static int _unpack_node_select_info_req_msg(node_info_select_request_msg_t **msg, - Buf buffer) + Buf buffer) { node_info_select_request_msg_t *node_sel_info; @@ -3008,7 +3114,7 @@ _unpack_node_select_info_req_msg(node_info_select_request_msg_t **msg, safe_unpack_time(&node_sel_info->last_update, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(node_sel_info); *msg = NULL; return SLURM_ERROR; @@ -3037,7 +3143,7 @@ _unpack_job_step_info_req_msg(job_step_info_request_msg_t ** msg, Buf buffer) safe_unpack16(&job_step_info->show_flags, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(job_step_info); *msg = NULL; return SLURM_ERROR; @@ -3062,7 +3168,7 @@ _unpack_node_info_request_msg(node_info_request_msg_t ** msg, Buf buffer) safe_unpack16(&node_info->show_flags, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(node_info); *msg = NULL; return SLURM_ERROR; @@ -3087,7 +3193,7 @@ _unpack_part_info_request_msg(part_info_request_msg_t ** msg, Buf buffer) safe_unpack16(&part_info->show_flags, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(part_info); *msg = NULL; return SLURM_ERROR; @@ -3127,7 +3233,7 @@ _unpack_slurm_addr_array(slurm_addr ** slurm_address, } return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(*slurm_address); *slurm_address = NULL; return SLURM_ERROR; @@ -3208,7 +3314,7 @@ _unpack_slurm_step_layout(slurm_step_layout_t **layout, Buf buffer) if (uint32_tmp != step_layout->tasks[i]) goto unpack_error; safe_unpackstr_malloc(&step_layout->host[i], &uint16_tmp, - buffer); + buffer); } return SLURM_SUCCESS; @@ -3292,10 +3398,10 @@ _unpack_ret_list(List *ret_list, } return SLURM_SUCCESS; - unpack_error: +unpack_error: if (ret_type && ret_type->type) { error("_unpack_ret_list: message type %u, record %d of %u", - ret_type->type, j, nl); + ret_type->type, j, nl); } if (ret_data_info) { /* failed unpacking data, free without putting on ret_list */ @@ -3393,12 +3499,12 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer) goto unpack_error; if (select_g_alloc_jobinfo (&launch_msg_ptr->select_jobinfo) - || select_g_unpack_jobinfo(launch_msg_ptr->select_jobinfo, buffer)) + || select_g_unpack_jobinfo(launch_msg_ptr->select_jobinfo, buffer)) goto unpack_error; return SLURM_SUCCESS; - unpack_error: +unpack_error: slurm_free_job_launch_msg(launch_msg_ptr); *msg = NULL; return SLURM_ERROR; @@ -3426,7 +3532,7 @@ _unpack_job_id_request_msg(job_id_request_msg_t ** msg, Buf buffer) safe_unpack32(&tmp_ptr->job_pid, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr); *msg = NULL; return SLURM_ERROR; @@ -3454,7 +3560,7 @@ _unpack_job_id_response_msg(job_id_response_msg_t ** msg, Buf buffer) safe_unpack32(&tmp_ptr->job_id, buffer); return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree(tmp_ptr); *msg = NULL; return SLURM_ERROR; @@ -3482,7 +3588,7 @@ _unpack_srun_ping_msg(srun_ping_msg_t ** msg_ptr, Buf buffer) safe_unpack32(&msg->step_id , buffer ) ; return SLURM_SUCCESS; - unpack_error: +unpack_error: *msg_ptr = NULL; xfree(msg); return SLURM_ERROR; @@ -3514,7 +3620,7 @@ _unpack_srun_node_fail_msg(srun_node_fail_msg_t ** msg_ptr, Buf buffer) return SLURM_SUCCESS; - unpack_error: +unpack_error: *msg_ptr = NULL; xfree( msg->nodelist ); xfree( msg ); @@ -3541,7 +3647,7 @@ _unpack_job_ready_msg(job_id_msg_t ** msg_ptr, Buf buffer) safe_unpack32(&msg->job_id , buffer ) ; return SLURM_SUCCESS; - unpack_error: +unpack_error: *msg_ptr = NULL; xfree(msg); return SLURM_ERROR; @@ -3571,7 +3677,7 @@ _unpack_srun_timeout_msg(srun_timeout_msg_t ** msg_ptr, Buf buffer) safe_unpack_time (&msg->timeout, buffer ); return SLURM_SUCCESS; - unpack_error: +unpack_error: *msg_ptr = NULL; xfree(msg); return SLURM_ERROR; @@ -3597,7 +3703,7 @@ static int _unpack_suspend_msg(suspend_msg_t **msg_ptr, Buf buffer) safe_unpack32(&msg->job_id , buffer ) ; return SLURM_SUCCESS; - unpack_error: +unpack_error: *msg_ptr = NULL; xfree(msg); return SLURM_ERROR; @@ -3630,7 +3736,7 @@ _unpack_checkpoint_msg(checkpoint_msg_t **msg_ptr, Buf buffer) safe_unpack32(&msg->step_id, buffer ) ; return SLURM_SUCCESS; - unpack_error: +unpack_error: *msg_ptr = NULL; xfree(msg); return SLURM_ERROR; @@ -3665,7 +3771,7 @@ _unpack_checkpoint_comp(checkpoint_comp_msg_t **msg_ptr, Buf buffer) safe_unpack_time ( & msg -> begin_time , buffer ) ; return SLURM_SUCCESS; - unpack_error: +unpack_error: *msg_ptr = NULL; xfree (msg->error_msg); xfree (msg); @@ -3697,7 +3803,7 @@ _unpack_checkpoint_resp_msg(checkpoint_resp_msg_t **msg_ptr, Buf buffer) safe_unpackstr_xmalloc ( & msg -> error_msg, & uint16_tmp , buffer ) ; return SLURM_SUCCESS; - unpack_error: +unpack_error: *msg_ptr = NULL; xfree(msg); return SLURM_ERROR; @@ -3761,7 +3867,7 @@ static int _unpack_file_bcast(file_bcast_msg_t ** msg_ptr , Buf buffer ) } return SLURM_SUCCESS; - unpack_error: +unpack_error: xfree( msg -> fname ); for (i=0; i<FILE_BLOCKS; i++) xfree( msg -> block[i] ); @@ -3796,9 +3902,9 @@ static int _unpack_kvs_rec(struct kvs_comm **msg_ptr, Buf buffer) msg->kvs_values = xmalloc(sizeof(char *) * msg->kvs_cnt); for (i=0; i<msg->kvs_cnt; i++) { safe_unpackstr_xmalloc(&msg->kvs_keys[i], - &uint16_tmp, buffer); + &uint16_tmp, buffer); safe_unpackstr_xmalloc(&msg->kvs_values[i], - &uint16_tmp, buffer); + &uint16_tmp, buffer); } return SLURM_SUCCESS; @@ -3826,7 +3932,7 @@ static int _unpack_kvs_data(struct kvs_comm_set **msg_ptr, Buf buffer) safe_unpack16(&msg->task_id, buffer); safe_unpack16(&msg->kvs_comm_recs, buffer); msg->kvs_comm_ptr = xmalloc(sizeof(struct kvs_comm) * - msg->kvs_comm_recs); + msg->kvs_comm_recs); for (i=0; i<msg->kvs_comm_recs; i++) { if (_unpack_kvs_rec(&msg->kvs_comm_ptr[i], buffer)) goto unpack_error; @@ -3875,34 +3981,34 @@ unpack_error: } /* template -void pack_ ( * msg , Buf buffer ) -{ - xassert ( msg != NULL ); - - pack16( msg -> , buffer ) ; - pack32((uint32_t)msg -> , buffer ) ; - packstr ( msg -> , buffer ) ; -} - -int unpack_ ( ** msg_ptr , Buf buffer ) -{ - uint16_t uint16_tmp; - * msg ; - - xassert ( msg_ptr != NULL ); - - msg = xmalloc ( sizeof ( ) ) ; - *msg_ptr = msg; - - safe_unpack16( & msg -> , buffer ) ; - safe_unpack32(& msg -> , buffer ) ; - safe_unpackstr_xmalloc ( & msg -> x, & uint16_tmp , buffer ) ; - return SLURM_SUCCESS; - - unpack_error: - xfree(msg -> x); - xfree(msg); - *msg_ptr = NULL; - return SLURM_ERROR; -} + void pack_ ( * msg , Buf buffer ) + { + xassert ( msg != NULL ); + + pack16( msg -> , buffer ) ; + pack32((uint32_t)msg -> , buffer ) ; + packstr ( msg -> , buffer ) ; + } + + int unpack_ ( ** msg_ptr , Buf buffer ) + { + uint16_t uint16_tmp; + * msg ; + + xassert ( msg_ptr != NULL ); + + msg = xmalloc ( sizeof ( ) ) ; + *msg_ptr = msg; + + safe_unpack16( & msg -> , buffer ) ; + safe_unpack32(& msg -> , buffer ) ; + safe_unpackstr_xmalloc ( & msg -> x, & uint16_tmp , buffer ) ; + return SLURM_SUCCESS; + + unpack_error: + xfree(msg -> x); + xfree(msg); + *msg_ptr = NULL; + return SLURM_ERROR; + } */ diff --git a/src/sbcast/agent.c b/src/sbcast/agent.c index 22d292188c5193b9cdc418b64370def03a2c4c61..e8b59ee00e56dfe3578b8d82d6e93eacf64388f3 100644 --- a/src/sbcast/agent.c +++ b/src/sbcast/agent.c @@ -114,7 +114,7 @@ static void *_agent_thread(void *args) /* Issue the RPC to transfer the file's data */ extern void send_rpc(file_bcast_msg_t *bcast_msg, - resource_allocation_response_msg_t *alloc_resp) + job_alloc_info_response_msg_t *alloc_resp) { /* Preserve some data structures across calls for better performance */ static forward_t from, forward[MAX_THREADS]; @@ -155,16 +155,15 @@ extern void send_rpc(file_bcast_msg_t *bcast_msg, &from.name[MAX_SLURM_NAME*i], MAX_SLURM_NAME); forward_set(&forward[threads_used], span[threads_used], - &i, &from); - msg[threads_used].msg_type = REQUEST_FILE_BCAST; - msg[threads_used].address = alloc_resp->node_addr[j]; - msg[threads_used].data = bcast_msg; - msg[threads_used].forward = forward[threads_used]; - msg[threads_used].ret_list = NULL; + &i, &from); + msg[threads_used].msg_type = REQUEST_FILE_BCAST; + msg[threads_used].address = alloc_resp->node_addr[j]; + msg[threads_used].data = bcast_msg; + msg[threads_used].forward = forward[threads_used]; + msg[threads_used].ret_list = NULL; msg[threads_used].orig_addr.sin_addr.s_addr = 0; msg[threads_used].srun_node_id = 0; - - + threads_used++; } xfree(span); diff --git a/src/sbcast/sbcast.c b/src/sbcast/sbcast.c index 0c0b813a99e989fbf3cb842c4555357c357ac9c3..f5b389712f56e1b8bb5ebecf0cc1e9e6a6795786 100644 --- a/src/sbcast/sbcast.c +++ b/src/sbcast/sbcast.c @@ -53,7 +53,7 @@ int fd; /* source file descriptor */ struct sbcast_parameters params; /* program parameters */ struct stat f_stat; /* source file stats */ -resource_allocation_response_msg_t *alloc_resp; /* job specification */ +job_alloc_info_response_msg_t *alloc_resp; /* job specification */ static void _bcast_file(void); static void _get_job_info(void); diff --git a/src/sbcast/sbcast.h b/src/sbcast/sbcast.h index b04f2ee58548855e0b132ba24517dbc198eb5ed3..479d5f66802a3ac4214740a0d6de521859cf1fb8 100644 --- a/src/sbcast/sbcast.h +++ b/src/sbcast/sbcast.h @@ -52,6 +52,6 @@ extern struct sbcast_parameters params; extern void parse_command_line(int argc, char *argv[]); extern void send_rpc(file_bcast_msg_t *bcast_msg, - resource_allocation_response_msg_t *alloc_resp); + job_alloc_info_response_msg_t *alloc_resp); #endif diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index c0dbb4d35113fee16540d45b75784a7a05d040d9..d7f234fe540d9351ffeaf4d9acd496f88f853815 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3586,13 +3586,13 @@ kill_job_on_node(uint32_t job_id, struct job_record *job_ptr, /* - * old_job_info - get details about an existing job allocation + * job_alloc_info - get details about an existing job allocation * IN uid - job issuing the code * IN job_id - ID of job for which info is requested * OUT job_pptr - set to pointer to job record */ extern int -old_job_info(uint32_t uid, uint32_t job_id, struct job_record **job_pptr) +job_alloc_info(uint32_t uid, uint32_t job_id, struct job_record **job_pptr) { struct job_record *job_ptr; @@ -4336,8 +4336,8 @@ extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd conn_fd) * OUT timeout_msg - job timeout response to be sent * RET SLURM_SUCESS or an error code */ -extern int job_end_time(old_job_alloc_msg_t *time_req_msg, - srun_timeout_msg_t *timeout_msg) +extern int job_end_time(job_alloc_info_msg_t *time_req_msg, + srun_timeout_msg_t *timeout_msg) { struct job_record *job_ptr; xassert(timeout_msg); diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 15b0bd69bfd82f348f72ebfaa92271e66fb0844e..2c9a347d8957d9c7c7557fe876faaf9d2f262328 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -92,7 +92,8 @@ inline static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg); inline static void _slurm_rpc_job_will_run(slurm_msg_t * msg); inline static void _slurm_rpc_node_registration(slurm_msg_t * msg); inline static void _slurm_rpc_node_select_info(slurm_msg_t * msg); -inline static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg); +inline static void _slurm_rpc_job_alloc_info(slurm_msg_t * msg); +inline static void _slurm_rpc_job_alloc_info_lite(slurm_msg_t * msg); inline static void _slurm_rpc_ping(slurm_msg_t * msg); inline static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg); inline static void _slurm_rpc_requeue(slurm_msg_t * msg); @@ -163,7 +164,7 @@ void slurmctld_req (slurm_msg_t * msg) break; case REQUEST_JOB_END_TIME: _slurm_rpc_end_time(msg); - slurm_free_old_job_alloc_msg(msg->data); + slurm_free_job_alloc_info_msg(msg->data); break; case REQUEST_NODE_INFO: _slurm_rpc_dump_nodes(msg); @@ -205,9 +206,13 @@ void slurmctld_req (slurm_msg_t * msg) _slurm_rpc_node_registration(msg); slurm_free_node_registration_status_msg(msg->data); break; - case REQUEST_OLD_JOB_RESOURCE_ALLOCATION: - _slurm_rpc_old_job_alloc(msg); - slurm_free_old_job_alloc_msg(msg->data); + case REQUEST_JOB_ALLOCATION_INFO: + _slurm_rpc_job_alloc_info(msg); + slurm_free_job_alloc_info_msg(msg->data); + break; + case REQUEST_JOB_ALLOCATION_INFO_LITE: + _slurm_rpc_job_alloc_info_lite(msg); + slurm_free_resource_allocation_response_msg(msg->data); break; case REQUEST_PING: _slurm_rpc_ping(msg); @@ -620,8 +625,8 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) static void _slurm_rpc_end_time(slurm_msg_t * msg) { DEF_TIMERS; - old_job_alloc_msg_t *time_req_msg = - (old_job_alloc_msg_t *) msg->data; + job_alloc_info_msg_t *time_req_msg = + (job_alloc_info_msg_t *) msg->data; srun_timeout_msg_t timeout_msg; slurm_msg_t response_msg; int rc; @@ -1235,16 +1240,16 @@ static void _slurm_rpc_node_registration(slurm_msg_t * msg) } } -/* _slurm_rpc_old_job_alloc - process RPC to get details on existing job */ -static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) +/* _slurm_rpc_job_alloc_info - process RPC to get details on existing job */ +static void _slurm_rpc_job_alloc_info(slurm_msg_t * msg) { int error_code = SLURM_SUCCESS; slurm_msg_t response_msg; struct job_record *job_ptr; DEF_TIMERS; - old_job_alloc_msg_t *job_desc_msg = - (old_job_alloc_msg_t *) msg->data; - resource_allocation_response_msg_t alloc_msg; + job_alloc_info_msg_t *job_info_msg = + (job_alloc_info_msg_t *) msg->data; + job_alloc_info_response_msg_t job_info_resp_msg; /* Locks: Read job, read node */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; @@ -1252,61 +1257,139 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) bool do_unlock = false; START_TIMER; - debug2("Processing RPC: REQUEST_OLD_JOB_RESOURCE_ALLOCATION"); + debug2("Processing RPC: REQUEST_JOB_ALLOCATION_INFO"); /* do RPC call */ uid = g_slurm_auth_get_uid(msg->auth_cred); do_unlock = true; lock_slurmctld(job_read_lock); - error_code = old_job_info(uid, job_desc_msg->job_id, &job_ptr); + error_code = job_alloc_info(uid, job_info_msg->job_id, &job_ptr); END_TIMER; /* return result */ if (error_code || (job_ptr == NULL)) { if (do_unlock) unlock_slurmctld(job_read_lock); - debug2("_slurm_rpc_old_job_alloc: JobId=%u, uid=%u: %s", - job_desc_msg->job_id, uid, + debug2("_slurm_rpc_job_alloc_info: JobId=%u, uid=%u: %s", + job_info_msg->job_id, uid, slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { - info("_slurm_rpc_old_job_alloc JobId=%u NodeList=%s %s", - job_desc_msg->job_id, job_ptr->nodes, TIME_STR); + info("_slurm_rpc_job_alloc_info JobId=%u NodeList=%s %s", + job_info_msg->job_id, job_ptr->nodes, TIME_STR); /* send job_ID and node_name_ptr */ - alloc_msg.cpu_count_reps = xmalloc(sizeof(uint32_t) * - job_ptr->num_cpu_groups); - memcpy(alloc_msg.cpu_count_reps, - job_ptr->cpu_count_reps, - (sizeof(uint32_t) * job_ptr->num_cpu_groups)); - alloc_msg.cpus_per_node = xmalloc(sizeof(uint32_t) * - job_ptr->num_cpu_groups); - memcpy(alloc_msg.cpus_per_node, job_ptr->cpus_per_node, - (sizeof(uint32_t) * job_ptr->num_cpu_groups)); - alloc_msg.error_code = error_code; - alloc_msg.job_id = job_desc_msg->job_id; - alloc_msg.node_addr = xmalloc(sizeof(slurm_addr) * - job_ptr->node_cnt); - memcpy(alloc_msg.node_addr, job_ptr->node_addr, - (sizeof(slurm_addr) * job_ptr->node_cnt)); - alloc_msg.node_cnt = job_ptr->node_cnt; - alloc_msg.node_list = xstrdup(job_ptr->nodes); - alloc_msg.num_cpu_groups = job_ptr->num_cpu_groups; - alloc_msg.select_jobinfo = select_g_copy_jobinfo(job_ptr->select_jobinfo); + job_info_resp_msg.cpu_count_reps = + xmalloc(sizeof(uint32_t) * job_ptr->num_cpu_groups); + memcpy(job_info_resp_msg.cpu_count_reps, + job_ptr->cpu_count_reps, + (sizeof(uint32_t) * job_ptr->num_cpu_groups)); + job_info_resp_msg.cpus_per_node = + xmalloc(sizeof(uint32_t) * job_ptr->num_cpu_groups); + memcpy(job_info_resp_msg.cpus_per_node, job_ptr->cpus_per_node, + (sizeof(uint32_t) * job_ptr->num_cpu_groups)); + job_info_resp_msg.error_code = error_code; + job_info_resp_msg.job_id = job_info_msg->job_id; + job_info_resp_msg.node_addr = xmalloc(sizeof(slurm_addr) * + job_ptr->node_cnt); + memcpy(job_info_resp_msg.node_addr, job_ptr->node_addr, + (sizeof(slurm_addr) * job_ptr->node_cnt)); + job_info_resp_msg.node_cnt = job_ptr->node_cnt; + job_info_resp_msg.node_list = xstrdup(job_ptr->nodes); + job_info_resp_msg.num_cpu_groups = job_ptr->num_cpu_groups; + job_info_resp_msg.select_jobinfo = + select_g_copy_jobinfo(job_ptr->select_jobinfo); unlock_slurmctld(job_read_lock); - response_msg.msg_type = RESPONSE_RESOURCE_ALLOCATION; - response_msg.data = &alloc_msg; + response_msg.msg_type = RESPONSE_JOB_ALLOCATION_INFO; + response_msg.data = &job_info_resp_msg; forward_init(&response_msg.forward, NULL); response_msg.ret_list = NULL; response_msg.forward_struct_init = 0; slurm_send_node_msg(msg->conn_fd, &response_msg); - select_g_free_jobinfo(&alloc_msg.select_jobinfo); - xfree(alloc_msg.cpu_count_reps); - xfree(alloc_msg.cpus_per_node); - xfree(alloc_msg.node_addr); - xfree(alloc_msg.node_list); + select_g_free_jobinfo(&job_info_resp_msg.select_jobinfo); + xfree(job_info_resp_msg.cpu_count_reps); + xfree(job_info_resp_msg.cpus_per_node); + xfree(job_info_resp_msg.node_addr); + xfree(job_info_resp_msg.node_list); + } +} + +/* _slurm_rpc_job_alloc_info_lite - process RPC to get minor details + on existing job */ +static void _slurm_rpc_job_alloc_info_lite(slurm_msg_t * msg) +{ + int error_code = SLURM_SUCCESS; + slurm_msg_t response_msg; + struct job_record *job_ptr; + DEF_TIMERS; + job_alloc_info_msg_t *job_info_msg = + (job_alloc_info_msg_t *) msg->data; + resource_allocation_response_msg_t job_info_resp_msg; + /* Locks: Read job, read node */ + slurmctld_lock_t job_read_lock = { + NO_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; + uid_t uid; + bool do_unlock = false; + + START_TIMER; + debug2("Processing RPC: REQUEST_JOB_ALLOCATION_INFO_LITE"); + + /* do RPC call */ + uid = g_slurm_auth_get_uid(msg->auth_cred); + do_unlock = true; + lock_slurmctld(job_read_lock); + error_code = job_alloc_info(uid, job_info_msg->job_id, &job_ptr); + END_TIMER; + + /* return result */ + if (error_code || (job_ptr == NULL)) { + if (do_unlock) + unlock_slurmctld(job_read_lock); + debug2("_slurm_rpc_job_alloc_info_lite: JobId=%u, uid=%u: %s", + job_info_msg->job_id, uid, + slurm_strerror(error_code)); + slurm_send_rc_msg(msg, error_code); + } else { + info("_slurm_rpc_job_alloc_info_lite JobId=%u NodeList=%s %s", + job_info_msg->job_id, job_ptr->nodes, TIME_STR); + + /* send job_ID and node_name_ptr */ + job_info_resp_msg.cpu_count_reps = + xmalloc(sizeof(uint32_t) * job_ptr->num_cpu_groups); + memcpy(job_info_resp_msg.cpu_count_reps, + job_ptr->cpu_count_reps, + (sizeof(uint32_t) * job_ptr->num_cpu_groups)); + job_info_resp_msg.cpus_per_node = + xmalloc(sizeof(uint32_t) * job_ptr->num_cpu_groups); + memcpy(job_info_resp_msg.cpus_per_node, job_ptr->cpus_per_node, + (sizeof(uint32_t) * job_ptr->num_cpu_groups)); + job_info_resp_msg.error_code = error_code; + job_info_resp_msg.job_id = job_info_msg->job_id; + job_info_resp_msg.node_addr = xmalloc(sizeof(slurm_addr) * + job_ptr->node_cnt); + memcpy(job_info_resp_msg.node_addr, job_ptr->node_addr, + (sizeof(slurm_addr) * job_ptr->node_cnt)); + job_info_resp_msg.node_cnt = job_ptr->node_cnt; + job_info_resp_msg.node_list = xstrdup(job_ptr->nodes); + job_info_resp_msg.num_cpu_groups = job_ptr->num_cpu_groups; + job_info_resp_msg.select_jobinfo = + select_g_copy_jobinfo(job_ptr->select_jobinfo); + unlock_slurmctld(job_read_lock); + + response_msg.msg_type = RESPONSE_JOB_ALLOCATION_INFO_LITE; + response_msg.data = &job_info_resp_msg; + forward_init(&response_msg.forward, NULL); + response_msg.ret_list = NULL; + response_msg.forward_struct_init = 0; + + slurm_send_node_msg(msg->conn_fd, &response_msg); + select_g_free_jobinfo(&job_info_resp_msg.select_jobinfo); + xfree(job_info_resp_msg.cpu_count_reps); + xfree(job_info_resp_msg.cpus_per_node); + xfree(job_info_resp_msg.node_addr); + xfree(job_info_resp_msg.node_list); } } @@ -1582,7 +1665,7 @@ static void _slurm_rpc_stat_jobacct(slurm_msg_t * msg) debug2("Processing RPC: MESSAGE_STAT_JOBACCT"); lock_slurmctld(job_read_lock); - error_code = old_job_info(uid, req->job_id, &job_ptr); + error_code = job_alloc_info(uid, req->job_id, &job_ptr); END_TIMER; /* return result */ if (error_code || (job_ptr == NULL)) { diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index f95d893860da68e0d4127f5c2eabff98b94ae203..6d9ec2a9cb58fc6f7c4eaf2a5c896fc74394378d 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -672,8 +672,8 @@ extern bool job_epilog_complete(uint32_t job_id, char *node_name, * OUT timeout_msg - job timeout response to be sent * RET SLURM_SUCESS or an error code */ -extern int job_end_time(old_job_alloc_msg_t *time_req_msg, - srun_timeout_msg_t *timeout_msg); +extern int job_end_time(job_alloc_info_msg_t *time_req_msg, + srun_timeout_msg_t *timeout_msg); /* job_fini - free all memory associated with job records */ extern void job_fini (void); @@ -957,13 +957,13 @@ extern void node_did_resp (char *name); extern void node_not_resp (char *name, time_t msg_time); /* - * old_job_info - get details about an existing job allocation + * job_alloc_info - get details about an existing job allocation * IN uid - job issuing the code * IN job_id - ID of job for which info is requested * OUT job_pptr - set to pointer to job record */ -extern int old_job_info(uint32_t uid, uint32_t job_id, - struct job_record **job_pptr); +extern int job_alloc_info(uint32_t uid, uint32_t job_id, + struct job_record **job_pptr); /* diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 371d3c6777c6c9cff7a5cce493b00423b7c5491b..a0a72f4c8ec878d52c97fca7a3b7a633cf1d5df5 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -162,31 +162,30 @@ jobid_from_env(void) static void _wait_for_resources(resource_allocation_response_msg_t **resp) { - old_job_alloc_msg_t old; resource_allocation_response_msg_t *r = *resp; int sleep_time = MIN_ALLOC_WAIT; + int job_id = r->job_id; if (!opt.quiet) info ("job %u queued and waiting for resources", r->job_id); - old.job_id = r->job_id; slurm_free_resource_allocation_response_msg(r); /* Keep polling until the job is allocated resources */ while (_wait_for_alloc_rpc(sleep_time, resp) <= 0) { - if (slurm_confirm_allocation(&old, resp) >= 0) + if (slurm_allocation_lookup_lite(job_id, resp) >= 0) break; if (slurm_get_errno() == ESLURM_JOB_PENDING) debug3 ("Still waiting for allocation"); else fatal ("Unable to confirm allocation for job %u: %m", - old.job_id); + job_id); if (destroy_job) { - verbose("cancelling job %u", old.job_id); - slurm_complete_job(old.job_id, 0); + verbose("cancelling job %u", job_id); + slurm_complete_job(job_id, 0); debugger_launch_failure(allocate_job); exit(0); } diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c index 12ab1c20a5327af371b1f8b8569b666a4bb5836c..333d0f2304e506d54b00feca27fb6c84158ec5c5 100644 --- a/src/srun/srun_job.c +++ b/src/srun/srun_job.c @@ -67,7 +67,6 @@ typedef struct allocation_info { uint32_t stepid; char *nodelist; uint32_t nnodes; - slurm_addr *addrs; uint16_t num_cpu_groups; uint32_t *cpus_per_node; uint32_t *cpu_count_reps; @@ -121,8 +120,7 @@ job_create_noalloc(void) cpn = (opt.nprocs + ai->nnodes - 1) / ai->nnodes; ai->cpus_per_node = &cpn; ai->cpu_count_reps = &ai->nnodes; - ai->addrs = NULL; - + /* * Create job, then fill in host addresses */ @@ -216,8 +214,7 @@ job_step_create_allocation(uint32_t job_id) cpn = (opt.nprocs + ai->nnodes - 1) / ai->nnodes; ai->cpus_per_node = &cpn; ai->cpu_count_reps = &ai->nnodes; - ai->addrs = NULL; - + /* * Create job, then fill in host addresses */ @@ -244,7 +241,6 @@ job_create_allocation(resource_allocation_response_msg_t *resp) i->num_cpu_groups = resp->num_cpu_groups; i->cpus_per_node = resp->cpus_per_node; i->cpu_count_reps = resp->cpu_count_reps; - i->addrs = resp->node_addr; i->select_jobinfo = select_g_copy_jobinfo(resp->select_jobinfo); job = _job_create_structure(i);