From c9dd5f770d7beb38a21e0fbca90662ac6db32b00 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 5 Jul 2002 19:05:12 +0000 Subject: [PATCH] Fix bug in unpack array code Add cpucount by node for resource allocation --- src/slurmctld/controller.c | 82 ++++++++++--------- src/slurmctld/job_mgr.c | 56 ++++++++----- src/slurmctld/node_mgr.c | 51 ------------ src/slurmctld/node_scheduler.c | 64 ++++++++++++++- src/slurmctld/slurmctld.h | 79 ++++++++---------- .../slurm_unit/api/manual/allocate-tst.c | 51 ++++++++---- 6 files changed, 205 insertions(+), 178 deletions(-) diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 0dba83311c1..08c9456d66b 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -194,7 +194,7 @@ slurmctld_req ( slurm_msg_t * msg ) slurm_rpc_update_partition ( msg ) ; break; default: - error ("slurmctld_req: invalid request msg type %d\n", msg-> msg_type); + error ("invalid request msg type %d\n", msg-> msg_type); slurm_send_rc_msg ( msg , EINVAL ); break; } @@ -216,7 +216,7 @@ slurm_rpc_dump_build ( slurm_msg_t * msg ) /* check to see if build_data has changed */ if ( last_time_msg -> last_update >= init_time ) { - info ("slurmctld_req: dump_build, no change, time=%ld", + info ("dump_build, no change, time=%ld", (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_NO_CHANGE_IN_DATA ); } @@ -230,7 +230,7 @@ slurm_rpc_dump_build ( slurm_msg_t * msg ) response_msg . data = & build_tbl ; /* send message */ - info ("slurmctld_req: dump_build time=%ld", (long) (clock () - start_time)); + info ("dump_build time=%ld", (long) (clock () - start_time)); slurm_send_node_msg( msg -> conn_fd , &response_msg ) ; } } @@ -250,7 +250,7 @@ slurm_rpc_dump_jobs ( slurm_msg_t * msg ) if ( last_time_msg -> last_update >= last_job_update ) { - info ("slurmctld_req: dump_job, no change, time=%ld", + info ("dump_job, no change, time=%ld", (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_NO_CHANGE_IN_DATA ); } @@ -266,7 +266,7 @@ slurm_rpc_dump_jobs ( slurm_msg_t * msg ) /* send message */ slurm_send_node_msg( msg -> conn_fd , &response_msg ) ; - info ("slurmctld_req: dump_job, size=%d, time=%ld", + info ("dump_job, size=%d, time=%ld", dump_size, (long) (clock () - start_time)); if (dump) xfree (dump); @@ -287,7 +287,7 @@ slurm_rpc_dump_nodes ( slurm_msg_t * msg ) if ( last_time_msg -> last_update >= last_node_update ) { - info ("slurmctld_req: dump_node, no change, time=%ld", + info ("dump_node, no change, time=%ld", (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_NO_CHANGE_IN_DATA ); } @@ -303,7 +303,7 @@ slurm_rpc_dump_nodes ( slurm_msg_t * msg ) /* send message */ slurm_send_node_msg( msg -> conn_fd , &response_msg ) ; - info ("slurmctld_req: dump_node, size=%d, time=%ld", + info ("dump_node, size=%d, time=%ld", dump_size, (long) (clock () - start_time)); if (dump) xfree (dump); @@ -324,7 +324,7 @@ slurm_rpc_dump_partitions ( slurm_msg_t * msg ) if ( last_time_msg -> last_update >= last_part_update ) { - info ("slurmctld_req: dump_part, no change, time=%ld", + info ("dump_part, no change, time=%ld", (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_NO_CHANGE_IN_DATA ); } @@ -340,7 +340,7 @@ slurm_rpc_dump_partitions ( slurm_msg_t * msg ) /* send message */ slurm_send_node_msg( msg -> conn_fd , &response_msg ) ; - info ("slurmctld_req: dump_part, size=%d, time=%ld", + info ("dump_part, size=%d, time=%ld", dump_size, (long) (clock () - start_time)); if (dump) xfree (dump); @@ -363,13 +363,13 @@ slurm_rpc_job_cancel ( slurm_msg_t * msg ) /* return result */ if (error_code) { - info ("slurmctld_req: job_cancel error %d for %u, time=%ld", + info ("job_cancel error %d for %u, time=%ld", error_code, job_id_msg->job_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , error_code ); } else { - info ("slurmctld_req: job_cancel success for %u, time=%ld", + info ("job_cancel success for %u, time=%ld", job_id_msg->job_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); } @@ -393,14 +393,14 @@ slurm_rpc_update_job ( slurm_msg_t * msg ) /* return result */ if (error_code) { - error ("slurmctld_req: update error %d on job id %u, time=%ld", + error ("update error %d on job id %u, time=%ld", error_code, job_desc_msg->job_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , error_code ); } else { - info ("slurmctld_req: updated job id %u, time=%ld", + info ("updated job id %u, time=%ld", job_desc_msg->job_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); @@ -426,14 +426,14 @@ slurm_rpc_update_node ( slurm_msg_t * msg ) /* return result */ if (error_code) { - error ("slurmctld_req: update error %d on node %s, time=%ld", + error ("update error %d on node %s, time=%ld", error_code, update_node_msg_ptr->node_names, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , error_code ); } else { - info ("slurmctld_req: updated node %s, time=%ld", + info ("updated node %s, time=%ld", update_node_msg_ptr->node_names, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); @@ -459,13 +459,13 @@ slurm_rpc_update_partition ( slurm_msg_t * msg ) /* return result */ if (error_code) { - error ("slurmctld_req: update error %d on partition %s, time=%ld", + error ("update error %d on partition %s, time=%ld", error_code, part_desc_ptr->name, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , error_code ); } else { - info ("slurmctld_req: updated partition %s, time=%ld", + info ("updated partition %s, time=%ld", part_desc_ptr->name, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); } @@ -500,13 +500,13 @@ slurm_rpc_submit_batch_job ( slurm_msg_t * msg ) /* return result */ if (error_code) { - info ("slurmctld_req: job_submit error %d, time=%ld", + info ("job_submit error %d, time=%ld", error_code, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , error_code ); } else { - info ("slurmctld_req: job_submit success for id=%u, time=%ld", + info ("job_submit success for id=%u, time=%ld", job_id, (long) (clock () - start_time)); /* send job_ID */ submit_msg . job_id = job_id ; @@ -526,26 +526,29 @@ void slurm_rpc_allocate_resources ( slurm_msg_t * msg , uint8_t immediate ) clock_t start_time; job_desc_msg_t * job_desc_msg = ( job_desc_msg_t * ) msg-> data ; char * node_list_ptr = NULL; - int job_id ; + uint16_t num_cpu_groups = 0; + uint32_t * cpus_per_node = NULL, * cpu_count_reps = NULL; + uint32_t job_id ; resource_allocation_response_msg_t alloc_msg ; start_time = clock (); /* do RPC call */ dump_job_desc(job_desc_msg); - error_code = job_allocate(job_desc_msg, - &job_id, &node_list_ptr, immediate , false ); + error_code = job_allocate(job_desc_msg, &job_id, + &node_list_ptr, &num_cpu_groups, &cpus_per_node, &cpu_count_reps, + immediate , false ); /* return result */ if (error_code) { - info ("slurmctld_req: error %d allocating resources, time=%ld", + info ("error %d allocating resources, time=%ld", error_code, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , error_code ); } else { - info ("slurmctld_req: allocated nodes %s, JobId=%u, time=%ld", + info ("allocated nodes %s, JobId=%u, time=%ld", node_list_ptr , job_id , (long) (clock () - start_time)); @@ -553,13 +556,15 @@ void slurm_rpc_allocate_resources ( slurm_msg_t * msg , uint8_t immediate ) alloc_msg . job_id = job_id ; alloc_msg . node_list = node_list_ptr ; - alloc_msg.num_cpu_groups = 0; - response_msg . msg_type = ( immediate ) ? RESPONSE_IMMEDIATE_RESOURCE_ALLOCATION : RESPONSE_RESOURCE_ALLOCATION ; + alloc_msg . num_cpu_groups = num_cpu_groups; + alloc_msg . cpus_per_node = cpus_per_node; + alloc_msg . cpu_count_reps = cpu_count_reps; + response_msg . msg_type = ( immediate ) ? + RESPONSE_IMMEDIATE_RESOURCE_ALLOCATION : RESPONSE_RESOURCE_ALLOCATION ; response_msg . data = & alloc_msg ; slurm_send_node_msg ( msg->conn_fd , & response_msg ) ; } - xfree ( node_list_ptr ); } /* slurm_rpc_job_will_run - determine if job with given configuration can be initiated now */ @@ -568,32 +573,33 @@ void slurm_rpc_job_will_run ( slurm_msg_t * msg ) /* init */ int error_code; clock_t start_time; + uint16_t num_cpu_groups = 0; + uint32_t * cpus_per_node = NULL, * cpu_count_reps = NULL; uint32_t job_id ; job_desc_msg_t * job_desc_msg = ( job_desc_msg_t * ) msg-> data ; - char * node_name_ptr = NULL; + char * node_list_ptr = NULL; start_time = clock (); /* do RPC call */ dump_job_desc(job_desc_msg); - error_code = job_allocate(job_desc_msg, - &job_id, &node_name_ptr, false , true ); + error_code = job_allocate(job_desc_msg, &job_id, + &node_list_ptr, &num_cpu_groups, &cpus_per_node, &cpu_count_reps, + false , true ); /* return result */ if (error_code) { - info ("slurmctld_req: job_will_run error %d, time=%ld", + info ("job_will_run error %d, time=%ld", error_code, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , error_code ); } else { - info ("slurmctld_req: job_will_run success for , time=%ld", + info ("job_will_run success for , time=%ld", (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); } - xfree ( node_name_ptr ) ; - } /* slurm_rpc_reconfigure_controller - re-initialize controller from configuration files */ @@ -615,13 +621,13 @@ slurm_rpc_reconfigure_controller ( slurm_msg_t * msg ) /* return result */ if (error_code) { - error ("slurmctld_req: reconfigure error %d, time=%ld", + error ("reconfigure error %d, time=%ld", error_code, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , error_code ); } else { - info ("slurmctld_req: reconfigure completed successfully, time=%ld", + info ("reconfigure completed successfully, time=%ld", (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); } @@ -654,13 +660,13 @@ slurm_rpc_node_registration ( slurm_msg_t * msg ) /* return result */ if (error_code) { - error ("slurmctld_req: validate_node_specs error %d for %s, time=%ld", + error ("validate_node_specs error %d for %s, time=%ld", error_code, node_reg_stat_msg -> node_name, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , error_code ); } else { - info ("slurmctld_req: validate_node_specs for %s, time=%ld", + info ("validate_node_specs for %s, time=%ld", node_reg_stat_msg -> node_name, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); } diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 2b5a48f75bb..200c3db6429 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -231,8 +231,6 @@ delete_job_details (struct job_record *job_entry) xfree(job_entry->details->req_nodes); if (job_entry->details->req_node_bitmap) bit_free(job_entry->details->req_node_bitmap); - if (job_entry->details->node_list) - xfree(job_entry->details->node_list); if (job_entry->details->features) xfree(job_entry->details->features); xfree(job_entry->details); @@ -263,8 +261,7 @@ delete_job_record (uint32_t job_id) if (job_record_point->job_id != job_id) continue; - if (job_record_point->details) - xfree (job_record_point->details); + delete_job_details (job_record_point); xfree (job_record_point); list_remove (job_record_iterator); break; @@ -358,45 +355,62 @@ init_job_conf () /* - * job_allocate - create job_records for job with supplied specification and - * allocate nodes for it. if the job can not be immediately allocated nodes + * job_allocate - parse the suppied job specification, create job_records for it, + * and allocate nodes for it. if the job can not be immediately allocated + * nodes, EAGAIN will be returned * input: job_specs - job specifications * new_job_id - location for storing new job's id * node_list - location for storing new job's allocated nodes - * immediate - either allocate nodes immediately or return failure - * will_run - test if job allocation would succeed, don't actually allocate nodes + * num_cpu_groups - location to store number of cpu groups + * cpus_per_node - location to store pointer to array of numbers of cpus on each node allocated + * cpu_count_reps - location to store pointer to array of numbers of consecutive nodes having + * same cpu count * output: new_job_id - the job's ID + * num_cpu_groups - number of cpu groups (elements in cpus_per_node and cpu_count_reps) + * cpus_per_node - pointer to array of numbers of cpus on each node allocate + * cpu_count_reps - pointer to array of numbers of consecutive nodes having same cpu count * node_list - list of nodes allocated to the job - * returns 0 on success, otherwise an error code from common/slurm_protocol_errno.h + * returns 0 on success, EINVAL if specification is invalid, + * EAGAIN if higher priority jobs exist * globals: job_list - pointer to global job list * list_part - global list of partition info * default_part_loc - pointer to default partition - * last_job_update - time of last job table update - * NOTE: the calling program must xfree the memory pointed to by node_list + * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts of + * 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} and + * cpu_count_reps={4,2,2} */ int immediate_job_launch (job_desc_msg_t * job_specs, uint32_t *new_job_id, char **node_list, - int immediate , int will_run ) + uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, + int immediate , int will_run ) { - return job_allocate (job_specs, new_job_id, node_list, true , false ); + return job_allocate (job_specs, new_job_id, node_list, + num_cpu_groups, cpus_per_node, cpu_count_reps, + true , false ); } int will_job_run (job_desc_msg_t * job_specs, uint32_t *new_job_id, char **node_list, - int immediate , int will_run ) + uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, + int immediate , int will_run ) { - return job_allocate (job_specs, new_job_id, node_list, false , true ); + return job_allocate (job_specs, new_job_id, node_list, + num_cpu_groups, cpus_per_node, cpu_count_reps, + false , true ); } -int -job_allocate (job_desc_msg_t * job_specs, uint32_t *new_job_id, char **node_list, - int immediate , int will_run ) +int +job_allocate (job_desc_msg_t *job_specs, uint32_t *new_job_id, char **node_list, + uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, + int immediate, int will_run) { int error_code; struct job_record *job_ptr; + *num_cpu_groups = 0; node_list[0] = NULL; + cpus_per_node[0] = cpu_count_reps[0] = NULL; error_code = job_create (job_specs, new_job_id, 1, will_run, &job_ptr); if (error_code || will_run) @@ -434,8 +448,10 @@ job_allocate (job_desc_msg_t * job_specs, uint32_t *new_job_id, char **node_list job_ptr->end_time = 0; } - node_list[0] = xmalloc (strlen(job_ptr->nodes) + 1); - strcpy(node_list[0], job_ptr->nodes); + node_list[0] = job_ptr->nodes; + *num_cpu_groups = job_ptr->num_cpu_groups; + cpus_per_node[0] = job_ptr->cpus_per_node; + cpu_count_reps[0] = job_ptr->cpu_count_reps; return 0; } diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index c3fd4acb93b..7a686d3e09c 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -346,57 +346,6 @@ bitmap2node_name (bitstr_t *bitmap, char **node_list) } -/* - * build_node_list - build a node_list for a job including processor - * count on the node (e.g. "lx01[4],lx02[4],...") - * task distributions on the nodes - * input: bitmap - bitmap of nodes to use - * node_list - place to store node list - * total_procs - place to store count of total processors allocated - * output: node_list - comma separated list of nodes on which the tasks - * are to be initiated - * total_procs - count of total processors allocated - * global: node_record_table_ptr - pointer to global node table - * NOTE: the storage at node_list must be xfreed by the caller - */ -void -build_node_list (bitstr_t *bitmap, char **node_list, uint32_t *total_procs) -{ - int i, node_list_size; - int sum_procs; - char tmp_str[MAX_NAME_LEN+10]; - - *total_procs = 0; - node_list[0] = NULL; - node_list_size = 0; - if (bitmap == NULL) - fatal ("build_node_list: bitmap is NULL"); - - node_list[0] = xmalloc (BUF_SIZE); - strcpy (node_list[0], ""); - - sum_procs = 0; - for (i = 0; i < node_record_count; i++) { - if (bit_test (bitmap, i) != 1) - continue; - sprintf (tmp_str, "%s[%d]", - node_record_table_ptr[i].name, - node_record_table_ptr[i].cpus); - if (node_list_size < - (strlen (node_list[0]) + (MAX_NAME_LEN+10))) { - node_list_size += BUF_SIZE; - xrealloc (node_list[0], node_list_size); - } - if (sum_procs > 0) - strcat (node_list[0], ","); - strcat (node_list[0], node_record_table_ptr[i].name); - sum_procs += node_record_table_ptr[i].cpus; - } - *total_procs = sum_procs; - xrealloc (node_list[0], strlen (node_list[0]) + 1); -} - - /* * create_config_record - create a config_record entry and set is values to the defaults. * output: returns pointer to the config_record diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 702e44e55cf..e821c2dc33a 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -31,6 +31,8 @@ struct node_set { /* set of nodes with same configuration */ bitstr_t *my_bitmap; }; +void build_node_details (bitstr_t *node_bitmap, + uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t **cpu_count_reps); int pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes, int req_cpus, int consecutive); int pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, @@ -625,7 +627,7 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, contiguous); if ((pick_code == 0) && (max_nodes != INFINITE) && (bit_set_count (avail_bitmap) > max_nodes)) { - info ("pick_best_nodes: too many nodes selected %u of %u", + info ("pick_best_nodes: too many nodes selected %u partition maximum is %u", bit_set_count (avail_bitmap), max_nodes); error_code = EINVAL; break; @@ -851,9 +853,10 @@ select_nodes (struct job_record *job_ptr, int test_only) /* assign the nodes and stage_in the job */ bitmap2node_name (req_bitmap, &(job_ptr->nodes)); - build_node_list (req_bitmap, - &job_ptr->details->node_list, - &job_ptr->details->total_procs); + build_node_details (req_bitmap, + &(job_ptr->num_cpu_groups), + &(job_ptr->cpus_per_node), + &(job_ptr->cpu_count_reps)); allocate_nodes (req_bitmap); job_ptr->node_bitmap = req_bitmap; req_bitmap = NULL; @@ -883,6 +886,59 @@ select_nodes (struct job_record *job_ptr, int test_only) } +/* + * build_node_details - given a bitmap, report the number of cpus per node and their distribution + * input: bitstr_t *node_bitmap - the map of nodes + * output: num_cpu_groups - element count in arrays cpus_per_node and cpu_count_reps + * cpus_per_node - array of cpus per node allocated + * cpu_count_reps - array of consecutive nodes with same cpu count + * NOTE: the arrays cpus_per_node and cpu_count_reps must be xfreed by the caller + */ +void +build_node_details (bitstr_t *node_bitmap, + uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t **cpu_count_reps) +{ + int array_size, array_pos, i; + int first_bit, last_bit; + + *num_cpu_groups = 0; + if (node_bitmap == NULL) + return; + + first_bit = bit_ffs(node_bitmap); + last_bit = bit_fls(node_bitmap); + array_pos = -1; + + /* assume relatively homogeneous array for array allocations */ + /* we can grow or shrink the arrays as needed */ + array_size = (last_bit - first_bit) / 100 + 2; + cpus_per_node[0] = xmalloc (sizeof(uint32_t *) * array_size); + cpu_count_reps[0] = xmalloc (sizeof(uint32_t *) * array_size); + + for (i = first_bit; i <= last_bit; i++) { + if (bit_test (node_bitmap, i) != 1) + continue; + if ((array_pos == -1) || + (cpus_per_node[0][array_pos] != node_record_table_ptr[i].cpus)) { + array_pos++; + if (array_pos >= array_size) { /* grow arrays */ + array_size *= 2; + xrealloc (cpus_per_node[0], (sizeof(uint32_t *) * array_size)); + xrealloc (cpu_count_reps[0], (sizeof(uint32_t *) * array_size)); + } + cpus_per_node [0][array_pos] = node_record_table_ptr[i].cpus; + cpu_count_reps[0][array_pos] = 1; + } + else { + cpu_count_reps[0][array_pos]++; + } + } + array_size = array_pos + 1; + *num_cpu_groups = array_size; + xrealloc (cpus_per_node[0], (sizeof(uint32_t *) * array_size)); + xrealloc (cpu_count_reps[0], (sizeof(uint32_t *) * array_size)); +} + /* * valid_features - determine if the requested features are satisfied by those available * input: requested - requested features (by a job) diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 1b4aefecdf1..7f144afccb7 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -1,17 +1,28 @@ -/* - * slurmctld.h - definitions for slurmcltd use - * - * NOTE: the job, node, and partition specifications are all of the - * same basic format: - * if the first character of a line is "#" then it is a comment. - * place all information for a single node, partition, or job on a - * single line. - * space delimit collection of keywords and values and separate - * the keyword from value with an equal sign (e.g. "cpus=3"). - * list entries should be comma separated (e.g. "nodes=lx01,lx02"). - * - * see the slurm administrator guide for more details. - */ +/*****************************************************************************\ + * slurmctld.h - definitions of functions and structures for slurmcltd use + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Moe Jette <jette@llnl.gov> et. al. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ #ifndef _HAVE_SLURM_H #define _HAVE_SLURM_H @@ -129,6 +140,7 @@ extern time_t last_step_update; /* time of last update to job steps */ extern int job_count; /* number of jobs in the system */ +/* job_details - specification of a job's constraints, not required after initiation */ struct job_details { uint32_t magic; /* magic cookie to test data integrity */ uint32_t num_procs; /* minimum number of processors */ @@ -145,7 +157,6 @@ struct job_details { char *job_script; /* name of job script to execute */ uint16_t procs_per_task; /* processors required per task */ uint32_t total_procs; /* total number of allocated processors, for accounting */ - char *node_list; /* comma separated assigned node list (by task) */ time_t submit_time; /* time of submission */ }; @@ -165,6 +176,9 @@ struct job_record { uint32_t priority; /* relative priority of the job */ struct job_details *details; /* job details (set until job terminates) */ uint16_t next_step_id; /* next step id to be used */ + uint16_t num_cpu_groups; /* element count in arrays cpus_per_node and cpu_count_reps */ + uint32_t *cpus_per_node; /* array of cpus per node allocated */ + uint32_t *cpu_count_reps; /* array of consecutive nodes with same cpu count */ }; struct step_record { @@ -216,21 +230,6 @@ extern void bitmap2node_name (bitstr_t *bitmap, char **node_list); */ extern enum task_dist block_or_cycle (char *in_string); -/* - * build_node_list - build a node_list for a job including processor - * count on the node (e.g. "lx01[4],lx02[4],...") - * input: bitmap - bitmap of nodes to use - * node_list - place to store node list - * total_procs - place to store count of total processors allocated - * output: node_list - comma separated list of nodes on which the tasks - * are to be initiated - * total_procs - count of total processors allocated - * global: node_record_table_ptr - pointer to global node table - * NOTE: the storage at node_list must be xfreed by the caller - */ -extern void build_node_list (bitstr_t *bitmap, char **node_list, - uint32_t *total_procs); - /* * count_cpus - report how many cpus are associated with the identified nodes * input: bitmap - a node bitmap @@ -421,23 +420,9 @@ extern int init_slurm_conf (); extern int is_key_valid (void * key); -/* - * job_allocate - parse the suppied job specification, create job_records for it, - * and allocate nodes for it. if the job can not be immediately allocated - * nodes, EAGAIN will be returned - * input: job_specs - job specifications - * new_job_id - location for storing new job's id - * node_list - location for storing new job's allocated nodes - * output: new_job_id - the job's ID - * node_list - list of nodes allocated to the job - * returns 0 on success, EINVAL if specification is invalid, - * EAGAIN if higher priority jobs exist - * globals: job_list - pointer to global job list - * list_part - global list of partition info - * default_part_loc - pointer to default partition - * NOTE: the calling program must xfree the memory pointed to by node_list - */ -extern int job_allocate (job_desc_msg_t *job_specs, uint32_t *new_job_id, char **node_list, int immediate, int will_run); +extern int job_allocate (job_desc_msg_t *job_specs, uint32_t *new_job_id, char **node_list, + uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, + int immediate, int will_run); /* * job_cancel - cancel the specified job diff --git a/testsuite/slurm_unit/api/manual/allocate-tst.c b/testsuite/slurm_unit/api/manual/allocate-tst.c index 196ca989770..183957567c3 100644 --- a/testsuite/slurm_unit/api/manual/allocate-tst.c +++ b/testsuite/slurm_unit/api/manual/allocate-tst.c @@ -31,8 +31,10 @@ #include <src/api/slurm.h> #include <testsuite/dejagnu.h> +void report_results(resource_allocation_response_msg_t* resp_msg); + /* main is used here for testing purposes only */ - int +int main (int argc, char *argv[]) { int error_code, job_count, max_jobs; @@ -60,18 +62,17 @@ main (int argc, char *argv[]) job_mesg. time_limit = 200; job_mesg. num_procs = 1000; job_mesg. num_nodes = 400; +job_mesg. num_nodes = 4096; job_mesg. user_id = 1500; error_code = slurm_allocate_resources ( &job_mesg , &resp_msg , false ); if (error_code) printf ("allocate error %d\n", error_code); - else { - printf ("allocate nodes %s to job %u\n", resp_msg->node_list, resp_msg->job_id); - } - job_count = 1; + else + report_results(resp_msg); - for ( ; job_count <max_jobs; job_count++) { + for (job_count = 1 ; job_count <max_jobs; job_count++) { slurm_init_job_desc_msg( &job_mesg ); job_mesg. contiguous = 1; job_mesg. groups = ("students,employee\0"); @@ -87,6 +88,8 @@ main (int argc, char *argv[]) job_mesg. time_limit = 200; job_mesg. num_procs = 4000; job_mesg. user_id = 1500; +/* job_mesg. num_nodes = 4096; */ +job_mesg. contiguous = 0; /* the string also had Immediate */ error_code = slurm_allocate_resources ( &job_mesg , &resp_msg , true ); @@ -94,10 +97,8 @@ main (int argc, char *argv[]) printf ("allocate error %d\n", error_code); break; } - else { - printf ("allocate nodes %s to job %u\n", - resp_msg->node_list, resp_msg->job_id); - } + else + report_results(resp_msg); } for ( ; job_count <max_jobs; job_count++) { @@ -111,10 +112,8 @@ main (int argc, char *argv[]) printf ("allocate error %d\n", error_code); break; } - else { - printf ("allocate nodes %s to job %u\n", - resp_msg->node_list, resp_msg->job_id); - } + else + report_results(resp_msg); } for ( ; job_count <max_jobs; job_count++) { @@ -128,11 +127,27 @@ main (int argc, char *argv[]) printf ("allocate error %d\n", error_code); break; } - else { - printf ("allocate nodes %s to job %u\n", - resp_msg->node_list, resp_msg->job_id); - } + else + report_results(resp_msg); } return (0); } + +/* report results of successful job allocation */ +void +report_results(resource_allocation_response_msg_t* resp_msg) +{ + int i; + + printf ("allocate nodes %s to job %u\n", resp_msg->node_list, resp_msg->job_id); + if (resp_msg->num_cpu_groups > 0) { + printf ("processor counts: "); + for (i=0; i<resp_msg->num_cpu_groups; i++) { + if (i > 0) + printf(", "); + printf ("%u(x%u)", resp_msg->cpus_per_node[i], resp_msg->cpu_count_reps[i]); + } + printf ("\n"); + } +} -- GitLab