diff --git a/src/slurmctld/Makefile.am b/src/slurmctld/Makefile.am index a0c24195b192fac1f8e838d0f3a5e3fed9e40d41..80955ae9e609050e0f5afa5fecb186caf6ce04ee 100644 --- a/src/slurmctld/Makefile.am +++ b/src/slurmctld/Makefile.am @@ -48,13 +48,13 @@ slurmctld_SOURCES = controller.c \ # # Note: automake 1.5 will complain about these... # -controller : controller_d.o node_mgr.o node_scheduler.o partition_mgr.o read_config.o +controller : controller_d.o job_mgr.o node_mgr.o node_scheduler.o partition_mgr.o read_config.o job_mgr : job_mgr_d.o node_mgr.o node_scheduler.o partition_mgr.o job_scheduler : job_scheduler_d.o job_mgr.o node_mgr : node_mgr_d.o node_scheduler : node_scheduler_d.o job_mgr.o node_mgr.o partition_mgr.o read_config.o partition_mgr : partition_mgr_d.o node_mgr.o -read_config : read_config_d.o node_mgr.o partition_mgr.o +read_config : read_config_d.o job_mgr.o node_mgr.o node_scheduler.o partition_mgr.o % : %_d.o $(LINK) $(LDFLAGS) $^ $(LDADD) $(LIBS) diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index aa7f66fc57397f654100a7a04b66c2bcce6009f0..85cc51233af75c8fed669b37dab129ba2b69b98c 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -222,7 +222,7 @@ slurmctld_req (int sockfd) { int error_code, in_size, i; char in_line[BUF_SIZE], node_name[MAX_NAME_LEN]; int cpus, real_memory, tmp_disk; - char *node_name_ptr, *part_name, *time_stamp; + char *job_id_ptr, *node_name_ptr, *part_name, *time_stamp; time_t last_update; clock_t start_time; char *dump; @@ -234,13 +234,15 @@ slurmctld_req (int sockfd) { /* Allocate: allocate resources for a job */ if (strncmp ("Allocate", in_line, 8) == 0) { node_name_ptr = NULL; - error_code = select_nodes (&in_line[8], &node_name_ptr); /* skip "Allocate" */ + error_code = job_allocate(&in_line[8], /* skip "Allocate" */ + &job_id_ptr, &node_name_ptr); if (error_code) info ("slurmctld_req: error %d allocating resources for %s, time=%ld", error_code, &in_line[8], (long) (clock () - start_time)); else - info ("slurmctld_req: allocated nodes %s to job %s, time=%ld", - node_name_ptr, &in_line[8], (long) (clock () - start_time)); + info ("slurmctld_req: allocated nodes %s to %s, JobId=%s, time=%ld", + node_name_ptr, &in_line[8], job_id_ptr, + (long) (clock () - start_time)); if (error_code == 0) send (sockfd, node_name_ptr, strlen (node_name_ptr) + 1, 0); @@ -249,6 +251,8 @@ slurmctld_req (int sockfd) { else send (sockfd, "EINVAL", 7, 0); + if (job_id_ptr) + xfree (job_id_ptr); if (node_name_ptr) xfree (node_name_ptr); } @@ -367,19 +371,20 @@ slurmctld_req (int sockfd) { /* JobSubmit - submit a job to the slurm queue */ else if (strncmp ("JobSubmit", in_line, 9) == 0) { time_stamp = NULL; - error_code = EINVAL; + error_code = job_create(&in_line[9], &job_id_ptr); /* skip "JobSubmit" */ if (error_code) info ("slurmctld_req: job_submit error %d, time=%ld", error_code, (long) (clock () - start_time)); else - info ("slurmctld_req: job_submit success for %s, time=%ld", - &in_line[10], (long) (clock () - start_time)); - fprintf (stderr, "job_submit time = %ld usec\n", - (long) (clock () - start_time)); + info ("slurmctld_req: job_submit success for %s, id=%s, time=%ld", + &in_line[9], job_id_ptr, + (long) (clock () - start_time)); if (error_code == 0) - send (sockfd, dump, dump_size, 0); + send (sockfd, job_id_ptr, strlen(job_id_ptr) + 1, 0); else send (sockfd, "EINVAL", 7, 0); + if (job_id_ptr) + xfree (job_id_ptr); } /* JobWillRun - determine if job with given configuration can be initiated now */ @@ -485,7 +490,7 @@ slurmctld_req (int sockfd) { else info ("slurmctld_req: updated partition %s, time=%ld", part_name, (long) (clock () - start_time)); - } + } sprintf (in_line, "%d", error_code); send (sockfd, in_line, strlen (in_line) + 1, 0); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index ae47bf5d8e22d5dfaaf2ab38ff611fe989fb3865..9b2e1682b62ddce9253e4cf871012fc7509552da 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -423,29 +423,77 @@ init_job_conf () } +/* + * job_allocate - parse the suppied job specification, create job_records for it, + * and allocate nodes for it. if the job can not be immediately allocated + * nodes, EAGAIN will be returned + * input: job_specs - job specifications + * new_job_id - location for storing new job's id + * node_list - location for storing new job's allocated nodes + * output: new_job_id - the job's ID + * node_list - list of nodes allocated to the job + * returns 0 on success, EINVAL if specification is invalid, + * EAGAIN if higher priority jobs exist + * globals: job_list - pointer to global job list + * list_part - global list of partition info + * default_part_loc - pointer to default partition + * NOTE: the calling program must xfree the memory pointed to by new_job_id + * and node_list + */ +int +job_allocate (char *job_specs, char **new_job_id, char **node_list) +{ + int error_code, i; + struct job_record *job_ptr; + + new_job_id[0] = node_list[0] = NULL; + + error_code = job_create (job_specs, new_job_id); + if (error_code) + return error_code; + job_ptr = find_job_record(new_job_id[0]); + if (job_ptr == NULL) + fatal ("job_allocate allocated job %s lacks record", + new_job_id[0]); + +/* if (top_priority(new_job_id[0]) != 0) + return EAGAIN; */ + error_code = select_nodes(job_ptr); + if (error_code) + return error_code; + i = strlen(job_ptr->nodes) + 1; + node_list[0] = xmalloc(i); + strcpy(node_list[0], job_ptr->nodes); + return 0; +} + + /* * job_create - parse the suppied job specification and create job_records for it * input: job_specs - job specifications - * output: returns 0 on success, EINVAL if specification is invalid + * new_job_id - location for storing new job's id + * output: new_job_id - the job's ID + * returns 0 on success, EINVAL if specification is invalid * globals: job_list - pointer to global job list * list_part - global list of partition info * default_part_loc - pointer to default partition + * NOTE: the calling program must xfree the memory pointed to by new_job_id */ int -job_create (char *job_specs) +job_create (char *job_specs, char **new_job_id) { char *req_features, *req_node_list, *job_name, *req_group; char *req_partition, *script, *out_line, *job_id; int contiguous, req_cpus, req_nodes, min_cpus, min_memory; int i, min_tmp_disk, time_limit, procs_per_task, user_id; int error_code, cpu_tally, dist, node_tally, key, shared; - char *job_script; struct part_record *part_ptr; struct job_record *job_ptr; struct job_details *detail_ptr; float priority; bitstr_t *req_bitmap; + new_job_id[0] = NULL; req_features = req_node_list = job_name = req_group = NULL; job_id = req_partition = script = NULL; req_bitmap = NULL; @@ -478,17 +526,22 @@ job_create (char *job_specs) error_code = EINVAL; goto cleanup; } + if (job_id && (strlen(job_id) >= MAX_ID_LEN)) { + info ("job_create: JobId specified is too long"); + error_code = EINVAL; + goto cleanup; + } if (user_id == NO_VAL) { - info ("job_create: job failed to User"); + info ("job_create: job failed to specify User"); error_code = EINVAL; goto cleanup; } if (contiguous == NO_VAL) contiguous = 0; /* default not contiguous */ if (req_cpus == NO_VAL) - req_cpus = 0; /* default no cpu count requirements */ + req_cpus = 1; /* default cpu count of 1 */ if (req_nodes == NO_VAL) - req_nodes = 0; /* default no node count requirements */ + req_nodes = 1; /* default node count of 1 */ if (min_cpus == NO_VAL) min_cpus = 1; /* default is 1 processor per node */ if (min_memory == NO_VAL) @@ -513,6 +566,7 @@ job_create (char *job_specs) error_code = EINVAL; goto cleanup; } + xfree (req_partition); } else { if (default_part_loc == NULL) { @@ -567,6 +621,7 @@ job_create (char *job_specs) if (i > req_nodes) req_nodes = i; bit_free (req_bitmap); + req_bitmap = NULL; } if (req_cpus > part_ptr->total_cpus) { info ("select_nodes: too many cpus (%d) requested of partition %s(%d)", @@ -594,6 +649,7 @@ job_create (char *job_specs) if ((job_ptr == NULL) || error_code) goto cleanup; + strncpy (job_ptr->partition, part_ptr->name, MAX_NAME_LEN); if (job_id) { strncpy (job_ptr->job_id, job_id, MAX_ID_LEN); xfree (job_id); @@ -605,7 +661,6 @@ job_create (char *job_specs) strncpy (job_ptr->name, job_name, MAX_NAME_LEN); xfree (job_name); } - strncpy (job_ptr->partition, part_ptr->name, MAX_NAME_LEN); job_ptr->user_id = (uid_t) user_id; job_ptr->job_state = JOB_PENDING; job_ptr->time_limit = time_limit; @@ -617,7 +672,7 @@ job_create (char *job_specs) detail_ptr = job_ptr->details; detail_ptr->num_procs = req_cpus; detail_ptr->num_nodes = req_nodes; - if (req_nodes) + if (req_node_list) detail_ptr->nodes = req_node_list; if (req_features) detail_ptr->features = req_features; @@ -634,6 +689,8 @@ job_create (char *job_specs) /* job_ptr->end_time *leave as NULL pointer for now */ /* detail_ptr->total_procs *leave as NULL pointer for now */ + new_job_id[0] = xmalloc(strlen(job_ptr->job_id) + 1); + strcpy(new_job_id[0], job_ptr->job_id); return 0; cleanup: @@ -883,10 +940,16 @@ parse_job_specs (char *job_specs, char **req_features, char **req_node_list, xfree (req_group[0]); if (req_partition[0]) xfree (req_partition[0]); + if (script[0]) + xfree (script[0]); + if (contiguous_str) + xfree (contiguous_str); + if (dist_str) + xfree (dist_str); if (shared_str) xfree (shared_str); req_features[0] = req_node_list[0] = req_group[0] = NULL; - req_partition[0] = job_name[0] = script[0] = NULL; + job_id[0] = req_partition[0] = job_name[0] = script[0] = NULL; } @@ -921,6 +984,8 @@ set_job_id (struct job_record *job_ptr) if ((job_ptr == NULL) || (job_ptr->magic != JOB_MAGIC)) fatal ("set_job_id: invalid job_ptr"); + if ((job_ptr->partition == NULL) || (strlen(job_ptr->partition) == 0)) + fatal ("set_job_id: partition not set"); while (1) { if (job_ptr->partition) sprintf(new_id, "%s.%d", job_ptr->partition, id_sequence++); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index d9078fcee34922c63900c682bbd4a929e0da3370..86a389f171d695a18e5090cdb50ba2808f58420c 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -42,7 +42,7 @@ main (int argc, char *argv[]) { int error_code, line_num, i; FILE *command_file; - char in_line[BUF_SIZE], *node_list; + char in_line[BUF_SIZE], *job_id, *node_list; log_options_t opts = LOG_OPTS_STDERR_ONLY; log_init(argv[0], opts, SYSLOG_FACILITY_DAEMON, NULL); @@ -101,23 +101,27 @@ main (int argc, char *argv[]) line_num = 0; printf ("\n"); while (fgets (in_line, BUF_SIZE, command_file)) { + job_id = node_list = NULL; if (in_line[strlen (in_line) - 1] == '\n') in_line[strlen (in_line) - 1] = (char) NULL; line_num++; - error_code = select_nodes (in_line, &node_list); + error_code = job_allocate(in_line, &job_id, &node_list); if (error_code) { if (strncmp (in_line, "JobName=FAIL", 12) != 0) printf ("ERROR:"); printf ("for job: %s\n", in_line, node_list); - printf ("node_scheduler: error %d from select_nodes on line %d\n\n", + printf ("node_scheduler: error %d from job_allocate on line %d\n\n", error_code, line_num); } else { - if (strncmp (in_line, "job_name=fail", 12) == 0) + if (strncmp (in_line, "JobName=FAIL", 12) == 0) printf ("ERROR: "); printf ("for job: %s\n nodes selected %s\n\n", in_line, node_list); - xfree (node_list); + if (job_id) + xfree (job_id); + if (node_list) + xfree (node_list); } } } @@ -612,187 +616,76 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, /* - * select_nodes - select and allocate nodes to a job with the given specifications - * input: job_specs - job specifications - * node_list - pointer to node list returned - * output: node_list - list of allocated nodes - * returns 0 on success, EINVAL if not possible to satisfy request, + * select_nodes - select and allocate nodes to a specific job + * input: job_ptr - pointer to the job record + * output: returns 0 on success, EINVAL if not possible to satisfy request, * or EAGAIN if resources are presently busy + * job_ptr->nodes is set to the node list (on success) * globals: list_part - global list of partition info * default_part_loc - pointer to default partition - * NOTE: the calling program must xfree the memory pointed to by node_list + * config_list - global list of node configuration info */ int -select_nodes (char *job_specs, char **node_list) +select_nodes (struct job_record *job_ptr) { - char *req_features, *req_node_list, *job_name, *req_group; - char *req_partition, *out_line, *script, *job_id; - int contiguous, req_cpus, req_nodes, min_cpus, min_memory; - int min_tmp_disk, time_limit, procs_per_task, dist; - int error_code, cpu_tally, node_tally, key, shared, user_id; - float priority; - struct part_record *part_ptr; + int error_code, i, node_set_index, node_set_size; bitstr_t *req_bitmap, *scratch_bitmap; ListIterator config_record_iterator; /* for iterating through config_list */ - struct config_record *config_record_point; /* pointer to config_record */ - int i; + struct config_record *config_record_point; struct node_set *node_set_ptr; - int node_set_index, node_set_size; + struct part_record *part_ptr; - req_features = req_node_list = job_name = req_group = NULL; req_bitmap = scratch_bitmap = NULL; - req_partition = script = job_id = NULL; - contiguous = req_cpus = req_nodes = min_cpus = NO_VAL; - min_memory = min_tmp_disk = NO_VAL; - key = shared = NO_VAL; - node_set_ptr = NULL; - config_record_iterator = NULL; - node_list[0] = NULL; config_record_iterator = (ListIterator) NULL; - node_lock (); - part_lock (); - - /* setup and basic parsing */ - error_code = - parse_job_specs (job_specs, &req_features, &req_node_list, - &job_name, &req_group, &req_partition, - &contiguous, &req_cpus, &req_nodes, - &min_cpus, &min_memory, &min_tmp_disk, &key, - &shared, &dist, &script, &time_limit, - &procs_per_task, &job_id, &priority, &user_id); - if (error_code != 0) { - error_code = EINVAL; /* permanent error, invalid parsing */ - error ("select_nodes: parsing failure on %s", job_specs); - goto cleanup; - } - if ((req_cpus == NO_VAL) && (req_nodes == NO_VAL) && (req_node_list == NULL)) { - info ("select_nodes: job failed to specify ReqNodes, TotalNodes or TotalProcs"); - error_code = EINVAL; - goto cleanup; - } - if (contiguous == NO_VAL) - contiguous = 0; /* default not contiguous */ - if (req_cpus == NO_VAL) - req_cpus = 0; /* default no cpu count requirements */ - if (req_nodes == NO_VAL) - req_nodes = 0; /* default no node count requirements */ - - - /* find selected partition */ - if (req_partition) { - part_ptr = - list_find_first (part_list, &list_find_part, - req_partition); - if (part_ptr == NULL) { - info ("select_nodes: invalid partition specified: %s", - req_partition); - error_code = EINVAL; - goto cleanup; - } - } - else { - if (default_part_loc == NULL) { - error ("select_nodes: default partition not set."); - error_code = EINVAL; - goto cleanup; - } - part_ptr = default_part_loc; - } - - - /* can this user access this partition */ - if (part_ptr->key && (is_key_valid (key) == 0)) { - info ("select_nodes: job lacks key required of partition %s", - part_ptr->name); - error_code = EINVAL; - goto cleanup; - } - if (match_group (part_ptr->allow_groups, req_group) == 0) { - info ("select_nodes: job lacks group required of partition %s", - part_ptr->name); - error_code = EINVAL; - goto cleanup; - } + node_set_ptr = NULL; + part_ptr = NULL; + if (job_ptr == NULL) + fatal("select_nodes: NULL job pointer value"); + if (job_ptr->magic != JOB_MAGIC) + fatal("select_nodes: bad job pointer value"); - /* check if select partition has sufficient resources to satisfy request */ - if (req_node_list) { /* insure that selected nodes are in this partition */ - error_code = node_name2bitmap (req_node_list, &req_bitmap); + /* pick up nodes from the weight ordered configuration list */ + if (job_ptr->details->nodes) { /* insure that selected nodes are in this partition */ + error_code = node_name2bitmap (job_ptr->details->nodes, &req_bitmap); if (error_code == EINVAL) goto cleanup; - if (error_code != 0) { - error_code = EAGAIN; /* no memory */ - goto cleanup; - } - if (contiguous == 1) - bit_fill_gaps (req_bitmap); - if (bit_super_set (req_bitmap, part_ptr->node_bitmap) != 1) { - info ("select_nodes: requested nodes %s not in partition %s", - req_node_list, part_ptr->name); - error_code = EINVAL; - goto cleanup; - } - i = count_cpus (req_bitmap); - if (i > req_cpus) - req_cpus = i; - i = bit_set_count (req_bitmap); - if (i > req_nodes) - req_nodes = i; - } - if (req_cpus > part_ptr->total_cpus) { - info ("select_nodes: too many cpus (%d) requested of partition %s(%d)", - req_cpus, part_ptr->name, part_ptr->total_cpus); - error_code = EINVAL; - goto cleanup; - } - if ((req_nodes > part_ptr->total_nodes) - || (req_nodes > part_ptr->max_nodes)) { - if (part_ptr->total_nodes > part_ptr->max_nodes) - i = part_ptr->max_nodes; - else - i = part_ptr->total_nodes; - info ("select_nodes: too many nodes (%d) requested of partition %s(%d)", - req_nodes, part_ptr->name, i); - error_code = EINVAL; - goto cleanup; - } - if (part_ptr->shared == 2) /* shared=force */ - shared = 1; - else if ((shared != 1) || (part_ptr->shared == 0)) /* user or partition want no sharing */ - shared = 0; - - - /* pick up nodes from the weight ordered configuration list */ + } + part_ptr = find_part_record(job_ptr->partition); + if (part_ptr == NULL) + fatal("select_nodes: invalid partition name %s for job %s", + job_ptr->partition, job_ptr->job_id); node_set_index = 0; node_set_size = 0; node_set_ptr = (struct node_set *) xmalloc (sizeof (struct node_set)); node_set_ptr[node_set_size++].my_bitmap = NULL; config_record_iterator = list_iterator_create (config_list); - if (config_record_iterator == NULL) { + if (config_record_iterator == NULL) fatal ("select_nodes: ListIterator_create unable to allocate memory"); - error_code = EAGAIN; - goto cleanup; - } while (config_record_point = (struct config_record *) list_next (config_record_iterator)) { int tmp_feature, check_node_config; - tmp_feature = - valid_features (req_features, + tmp_feature = valid_features (job_ptr->details->features, config_record_point->feature); if (tmp_feature == 0) continue; - /* since nodes can register with more resources than defined in the configuration, */ - /* we want to use those higher values for scheduling, but only as needed */ - if ((min_cpus > config_record_point->cpus) || - (min_memory > config_record_point->real_memory) || - (min_tmp_disk > config_record_point->tmp_disk)) + /* since nodes can register with more resources than defined */ + /* in the configuration, we want to use those higher values */ + /* for scheduling, but only as needed */ + if ((job_ptr->details->min_procs > config_record_point->cpus) || + (job_ptr->details->min_memory > config_record_point->real_memory) || + (job_ptr->details->min_tmp_disk > config_record_point->tmp_disk)) { + if (FAST_SCHEDULE) /* don't bother checking each node */ + continue; check_node_config = 1; + } else check_node_config = 0; + node_set_ptr[node_set_index].my_bitmap = bit_copy (config_record_point->node_bitmap); if (node_set_ptr[node_set_index].my_bitmap == NULL) @@ -801,25 +694,24 @@ select_nodes (char *job_specs, char **node_list) part_ptr->node_bitmap); node_set_ptr[node_set_index].nodes = bit_set_count (node_set_ptr[node_set_index].my_bitmap); - /* check configuration of individual nodes only if the check of baseline */ - /* values in the configuration file are too low. this will slow the scheduling */ - /* for very large cluster. */ - if ((FAST_SCHEDULE == 0) && check_node_config && - (node_set_ptr[node_set_index].nodes != 0)) { + + /* check configuration of individual nodes only if the check */ + /* of baseline values in the configuration file are too low. */ + /* this will slow the scheduling for very large cluster. */ + if (check_node_config && (node_set_ptr[node_set_index].nodes != 0)) { for (i = 0; i < node_record_count; i++) { if (bit_test (node_set_ptr[node_set_index].my_bitmap, i) == 0) continue; - if ((min_cpus <= - node_record_table_ptr[i].cpus) - && (min_memory <= + if ((job_ptr->details->min_procs <= + node_record_table_ptr[i].cpus) + && (job_ptr->details->min_memory <= node_record_table_ptr[i].real_memory) - && (min_tmp_disk <= + && (job_ptr->details->min_tmp_disk <= node_record_table_ptr[i].tmp_disk)) continue; bit_clear (node_set_ptr[node_set_index].my_bitmap, i); - if ((--node_set_ptr[node_set_index].nodes) == - 0) + if ((--node_set_ptr[node_set_index].nodes) == 0) break; } } @@ -831,8 +723,7 @@ select_nodes (char *job_specs, char **node_list) if (req_bitmap) { if (scratch_bitmap) bit_or (scratch_bitmap, - node_set_ptr[node_set_index]. - my_bitmap); + node_set_ptr[node_set_index].my_bitmap); else { scratch_bitmap = bit_copy (node_set_ptr[node_set_index].my_bitmap); @@ -840,10 +731,8 @@ select_nodes (char *job_specs, char **node_list) fatal ("bit_copy memory allocation failure"); } } - node_set_ptr[node_set_index].cpus_per_node = - config_record_point->cpus; - node_set_ptr[node_set_index].weight = - config_record_point->weight; + node_set_ptr[node_set_index].cpus_per_node = config_record_point->cpus; + node_set_ptr[node_set_index].weight = config_record_point->weight; node_set_ptr[node_set_index].feature = tmp_feature; #if DEBUG_MODULE > 1 info ("found %d usable nodes from configuration with %s", @@ -856,10 +745,12 @@ select_nodes (char *job_specs, char **node_list) } if (node_set_index == 0) { info ("select_nodes: no node configurations satisfy requirements %d:%d:%d:%s", - min_cpus, min_memory, min_tmp_disk, req_features); + job_ptr->details->min_procs, job_ptr->details->min_memory, + job_ptr->details->min_tmp_disk, job_ptr->details->features); error_code = EINVAL; goto cleanup; - } + } + /* eliminate last (incomplete) node_set record */ if (node_set_ptr[node_set_index].my_bitmap) bit_free (node_set_ptr[node_set_index].my_bitmap); node_set_ptr[node_set_index].my_bitmap = NULL; @@ -869,7 +760,8 @@ select_nodes (char *job_specs, char **node_list) if ((scratch_bitmap == NULL) || (bit_super_set (req_bitmap, scratch_bitmap) != 1)) { info ("select_nodes: requested nodes do not satisfy configurations requirements %d:%d:%d:%s", - min_cpus, min_memory, min_tmp_disk, req_features); + job_ptr->details->min_procs, job_ptr->details->min_memory, + job_ptr->details->min_tmp_disk, job_ptr->details->features); error_code = EINVAL; goto cleanup; } @@ -878,8 +770,10 @@ select_nodes (char *job_specs, char **node_list) /* pick the nodes providing a best-fit */ error_code = pick_best_nodes (node_set_ptr, node_set_size, - &req_bitmap, req_cpus, req_nodes, - contiguous, shared, + &req_bitmap, job_ptr->details->num_procs, + job_ptr->details->num_nodes, + job_ptr->details->contiguous, + job_ptr->details->shared, part_ptr->max_nodes); if (error_code == EAGAIN) goto cleanup; @@ -888,26 +782,19 @@ select_nodes (char *job_specs, char **node_list) goto cleanup; } - /* mark the selected nodes as STATE_STAGE_IN */ - allocate_nodes (req_bitmap); - error_code = bitmap2node_name (req_bitmap, node_list); - if (error_code) + /* assign the nodes and stage_in the job */ + error_code = bitmap2node_name (req_bitmap, &(job_ptr->nodes)); + if (error_code) { error ("bitmap2node_name error %d", error_code); - + goto cleanup; + } + allocate_nodes (req_bitmap); + job_ptr->job_state = JOB_STAGE_IN; + job_ptr->start_time = time(NULL); + if (job_ptr->time_limit >= 0) + job_ptr->end_time = time(NULL) + (job_ptr->time_limit * 60); cleanup: - part_unlock (); - node_unlock (); - if (req_features) - xfree (req_features); - if (req_node_list) - xfree (req_node_list); - if (job_name) - xfree (job_name); - if (req_group) - xfree (req_group); - if (req_partition) - xfree (req_partition); if (req_bitmap) bit_free (req_bitmap); if (scratch_bitmap) diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 1ae3d6b241690bc440f01554e0292e02ed70a000..10e0dd996503a4c30197554969e0412565f6d899 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -106,7 +106,7 @@ main (int argc, char *argv[]) if (error_code) printf ("ERROR: update_part error %d\n", error_code); - part_ptr = list_find_first (part_list, &list_find_part, "batch"); + part_ptr = find_part_record ("batch"); if (part_ptr == NULL) printf ("ERROR: list_find failure\n"); if (part_ptr->max_time != 34) @@ -426,6 +426,18 @@ dump_part (struct part_record *part_record_point, char *out_line, int out_line_s } +/* + * find_part_record - find a record for partition with specified name, + * input: name - name of the desired partition + * output: return pointer to node partition or null if not found + * global: part_list - global partition list + */ +struct part_record * +find_part_record (char *name){ + return list_find_first (part_list, &list_find_part, name); +} + + /* * init_part_conf - initialize the default partition configuration values and create * a (global) partition list. diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index a90d2032817ab06f5c8fb89247d6c72abcc0f12e..f58fa28ddcc6da75ba69619479e13428c059c068 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -287,12 +287,13 @@ init_slurm_conf () { backup_controller = NULL; } - error_code = init_node_conf (); - if (error_code) + if (error_code = init_node_conf ()) return error_code; - error_code = init_part_conf (); - if (error_code) + if (error_code = init_part_conf ()) + return error_code; + + if (error_code = init_job_conf ()) return error_code; return 0;