diff --git a/src/common/bitstring.c b/src/common/bitstring.c index 9d84aeaa78bb933521f33c4fd601d637469e972e..88791da7e16da25736c77a3e67eb0e4200fd7f72 100644 --- a/src/common/bitstring.c +++ b/src/common/bitstring.c @@ -1,12 +1,31 @@ -/* - * $Id$ - * $Source$ - * - * See comments about origin, limitations, and internal structure in - * bitstring.h. +/*****************************************************************************\ + * bitstring.c - bitmap manipulation functions + ***************************************************************************** + * See comments about origin, limitations, and internal structure in + * bitstring.h. * - * J. Garlick April 2002 - */ + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Jim Garlick <garlick@llnl.gov>, Moe Jette <jette1@llnl.gov> + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ #include <assert.h> #include <stdlib.h> diff --git a/src/common/bitstring.h b/src/common/bitstring.h index 3d8a6fab84465d92a71efe058e6accc06dec3d6c..44183054df074870f52bcf5e8aa477bfad4b4a4d 100644 --- a/src/common/bitstring.h +++ b/src/common/bitstring.h @@ -1,11 +1,34 @@ -/* - * $Id$ - * $Source$ - * - * Reimplementation of the functionality of Paul Vixie's bitstring.h macros - * from his cron package and later contributed to 4.4BSD. Little remains, - * though interface semantics are preserved in functions noted below. +/*****************************************************************************\ + * bitstring.h - definitions for bitstring.c, bitmap manipulation functions + ***************************************************************************** + * Reimplementation of the functionality of Paul Vixie's bitstring.h macros + * from his cron package and later contributed to 4.4BSD. Little remains, + * though interface semantics are preserved in functions noted below. * + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Jim Garlick <garlick@llnl.gov>, Moe Jette <jette1@llnl.gov> + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +/* * A bitstr_t is an array of configurable size words. The first two words * are for internal use. Word 0 is a magic cookie used to validate that the * bitstr_t is properly initialized. Word 1 is the number of valid bits in diff --git a/src/common/list.c b/src/common/list.c index 16f27638b26386ec4b94765ab1661cbfbc35a162..a0dbf0ffc28331701687cdab3e99d2df9b8047b6 100644 --- a/src/common/list.c +++ b/src/common/list.c @@ -285,7 +285,6 @@ int list_delete_all(List l, ListFindF f, void *key) assert(l != NULL); assert(f != NULL); - assert(key != NULL); list_mutex_lock(&l->mutex); assert(l->magic == LIST_MAGIC); pp = &l->head; diff --git a/src/common/parse_spec.c b/src/common/parse_spec.c index d9f7450280a2f805e7a281beed209bde28ce3280..3695b2b8fba9fb42d387e49952df36eaf1fa335f 100644 --- a/src/common/parse_spec.c +++ b/src/common/parse_spec.c @@ -1,8 +1,29 @@ /* $Id$ */ - -/* slurm_parser() functionality - split out from bits_bytes.c and - * moved into slurmctld/ srcdir - */ +/*****************************************************************************\ + * parse_spec.c - configuration file parser + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Moe Jette <jette1@llnl.gov> + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ #include <stdarg.h> #include <stdio.h> @@ -83,7 +104,7 @@ slurm_parser (char *spec, ...) /* - * load_float - location into which result is stored + * load_float - parse a string for a keyword, value pair, and load the float value * keyword - string to search for * in_line - string to search for keyword * output: *destination - set to value, no change if value not found @@ -118,7 +139,7 @@ load_float (float *destination, char *keyword, char *in_line) /* - * load_integer - parse a string for a keyword, value pair + * load_integer - parse a string for a keyword, value pair, and load the integer value * input: *destination - location into which result is stored * keyword - string to search for * in_line - string to search for keyword @@ -171,7 +192,7 @@ load_integer (int *destination, char *keyword, char *in_line) /* - * load_long - parse a string for a keyword, value pair + * load_long - parse a string for a keyword, value pair, and load the long value * input: *destination - location into which result is stored * keyword - string to search for * in_line - string to search for keyword @@ -223,7 +244,7 @@ load_long (long *destination, char *keyword, char *in_line) /* - * load_string - parse a string for a keyword, value pair + * load_string - parse a string for a keyword, value pair, and load the char value * input: *destination - location into which result is stored * keyword - string to search for * in_line - string to search for keyword diff --git a/src/common/parse_spec.h b/src/common/parse_spec.h index 8b8dc3b064ac6f38c699d80a8e338f4da7ea1afa..c592996426dfe9e8696dd1c28ff0c34937d8f432 100644 --- a/src/common/parse_spec.h +++ b/src/common/parse_spec.h @@ -1,3 +1,29 @@ +/*****************************************************************************\ + * parse_spec.h - header for parser parse_spec.c + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Moe Jette <jette1@llnl.gov> + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + #ifndef _SLURM_PARSE_H_ #define _SLURM_PARSE_H_ @@ -18,19 +44,19 @@ * slurm_parser - parse the supplied specification into keyword/value pairs * only the keywords supplied will be searched for. the supplied specification * is altered, overwriting the keyword and value pairs with spaces. - * input: spec - pointer to the string of specifications - * sets of three values (as many sets as required): keyword, type, value - * keyword - string with the keyword to search for including equal sign - * (e.g. "name=") - * type - char with value 'd' for int, 'f' for float, 's' for string - * value - pointer to storage location for value (char **) for type 's' - * output: spec - everything read is overwritten by speces - * value - set to read value (unchanged if keyword not found) - * return - 0 if no error, otherwise errno code - * NOTE: terminate with a keyword value of "END" - * NOTE: values of type (char *) are xfreed if non-NULL. caller must xfree any - * returned value */ -extern int slurm_parser (char *spec, ...); +extern int slurm_parser (char *spec, ...) ; + +/* load_float - parse a string for a keyword, value pair, and load the float value */ +extern int load_float (float *destination, char *keyword, char *in_line) ; + +/* load_integer - parse a string for a keyword, value pair, and load the integer value */ +extern int load_integer (int *destination, char *keyword, char *in_line) ; + +/* load_long - parse a string for a keyword, value pair, and load the long value */ +extern int load_long (long *destination, char *keyword, char *in_line) ; + +/* load_string - parse a string for a keyword, value pair, and load the char value */ +extern int load_string (char **destination, char *keyword, char *in_line) ; #endif diff --git a/src/scancel/scancel.c b/src/scancel/scancel.c index 26b6cc2d33772766c52607be6886617eccaf9060..a12ddb2c8664a15a37ab2e584fd6fd3baabaaa87 100644 --- a/src/scancel/scancel.c +++ b/src/scancel/scancel.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * scancel - cancel the specified job id or step id + * scancel - cancel specified job(s) and/or job step(s) ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -88,6 +88,7 @@ main (int argc, char *argv[]) exit (0); } +/* job_cancel - process request to cancel a specific job or job step */ void job_cancel (char *name, int interactive) { @@ -146,6 +147,7 @@ job_cancel (char *name, int interactive) } } +/* confirmation - Confirm job cancel request interactively */ int confirmation (uint32_t job_id, int has_step, uint32_t step_id) { @@ -166,10 +168,11 @@ confirmation (uint32_t job_id, int has_step, uint32_t step_id) } +/* usage - print message describing command lone options for scancel */ void -usage(char *command) +usage (char *command) { - printf ("Usage: %s job_id[.step_id] [job_id[.step_id] ...]\n", command); + printf ("Usage: %s [-i] [-v] job_id[.step_id] [job_id[.step_id] ...]\n", command); } diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index f29dab8be2a7d0b2bf753c718849f62c19679492..760f456956d0bca8b90a28431a829ead9876894c 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -391,7 +391,7 @@ slurm_rpc_job_step_cancel ( slurm_msg_t * msg ) } else { - info ("slurm_rpc_job_step_cancel success for %u, time=%ld", + info ("slurm_rpc_job_step_cancel success for JobId=%u, time=%ld", job_step_id_msg->job_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); } @@ -584,7 +584,7 @@ void slurm_rpc_allocate_resources ( slurm_msg_t * msg , uint8_t immediate ) } else { - info ("aslurm_rpc_allocate_resources allocated nodes %s to JobId=%u, time=%ld", + info ("slurm_rpc_allocate_resources allocated nodes %s to JobId=%u, time=%ld", node_list_ptr , job_id , (long) (clock () - start_time)); @@ -705,7 +705,7 @@ slurm_rpc_job_step_create( slurm_msg_t* msg ) (long) (clock () - start_time)); job_step_resp.job_step_id = step_rec->step_id; - bitmap2node_name( step_rec->node_bitmap, &(job_step_resp.node_list) ); + job_step_resp.node_list = bitmap2node_name( step_rec->node_bitmap ); job_step_resp.credentials = &cred; #ifdef HAVE_LIBELAN3 diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 4f00ef00029ec515646915bbb77d87178e0a8323..e720eaf08f90860ab31366773962f2e9f0b7ee28 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1,5 +1,7 @@ /*****************************************************************************\ * job_mgr.c - manage the job information of slurm + * Note: there is a global job list (job_list), job_count, time stamp + * (last_job_update), and hash table (job_hash, job_hash_over, max_hash_over) ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -34,12 +36,12 @@ #include <stdlib.h> #include <string.h> -#include <src/slurmctld/slurmctld.h> #include <src/common/list.h> #include <src/common/macros.h> #include <src/common/pack.h> #include <src/common/slurm_protocol_errno.h> #include <src/common/xstring.h> +#include <src/slurmctld/slurmctld.h> #define BUF_SIZE 1024 #define MAX_STR_PACK 128 @@ -65,8 +67,8 @@ void list_delete_job (void *job_entry); int list_find_job_id (void *job_entry, void *key); int list_find_job_old (void *job_entry, void *key); int top_priority (struct job_record *job_ptr); -int copy_job_desc_to_job_record ( job_desc_msg_t * job_desc , struct job_record ** job_ptr , struct part_record *part_ptr, bitstr_t *req_bitmap) ; -int validate_job_desc ( job_desc_msg_t * job_desc_msg , int allocate ) ; +int copy_job_desc_to_job_record ( job_desc_msg_t * job_desc , struct job_record ** job_ptr , struct part_record *part_ptr, bitstr_t *req_bitmap) ; +int validate_job_desc ( job_desc_msg_t * job_desc_msg , int allocate ) ; #if DEBUG_MODULE /* main is used here for module testing purposes only */ @@ -173,6 +175,7 @@ create_job_record (int *error_code) struct job_record *job_record_point; struct job_details *job_details_point; + purge_old_job (); if (job_count >= MAX_JOB_COUNT) { error ("create_job_record: job_count exceeds limit"); *error_code = EAGAIN; @@ -232,8 +235,8 @@ delete_job_details (struct job_record *job_entry) * find_job_record - return a pointer to the job record with the given job_id * input: job_id - requested job's id * output: pointer to the job's record, NULL on error - * global: job_list - global job list pointer * job_hash, job_hash_over, max_hash_over - hash table into job records + * global: job_list - global job list pointer */ struct job_record * find_job_record(uint32_t job_id) @@ -337,12 +340,12 @@ init_job_conf () * node_list - list of nodes allocated to the job * returns 0 on success, EINVAL if specification is invalid, * EAGAIN if higher priority jobs exist - * globals: job_list - pointer to global job list - * list_part - global list of partition info - * default_part_loc - pointer to default partition * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts of * 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} and * cpu_count_reps={4,2,2} + * globals: job_list - pointer to global job list + * list_part - global list of partition info + * default_part_loc - pointer to default partition */ int @@ -449,27 +452,28 @@ job_cancel (uint32_t job_id) job_ptr->job_state = JOB_FAILED; job_ptr->start_time = job_ptr->end_time = time(NULL); delete_job_details(job_ptr); - info ("job_cancel of pending job %u successful", job_id); + verbose ("job_cancel of pending job %u successful", job_id); return 0; } if (job_ptr->job_state == JOB_STAGE_IN) { last_job_update = time (NULL); job_ptr->job_state = JOB_FAILED; + job_ptr->end_time = time(NULL); deallocate_nodes (job_ptr->node_bitmap); delete_job_details(job_ptr); - info ("job_cancel of job %u successful", job_id); + verbose ("job_cancel of running job %u successful", job_id); return 0; } - info ("job_cancel: job %u can't be cancelled from state=%s", + verbose ("job_cancel: job %u can't be cancelled from state=%s", job_id, job_state_string(job_ptr->job_state)); return ESLURM_TRANSITION_STATE_NO_UPDATE; } /* - * job_create - create job_records with supplied jobs specifications. + * job_create - create a job table record for the supplied specifications. * this performs only basic tests for request validity (access to partition, * nodes count in partition, and sufficient processors in partition). * input: job_specs - job specifications @@ -860,9 +864,12 @@ list_find_job_old (void *job_entry, void *key) min_age = time(NULL) - MIN_JOB_AGE; - if (((struct job_record *) job_entry)->job_state != JOB_COMPLETE) + if (((struct job_record *) job_entry)->end_time > min_age) return 0; - if (((struct job_record *) job_entry)->end_time < min_age) + + if ((((struct job_record *) job_entry)->job_state != JOB_COMPLETE) && + (((struct job_record *) job_entry)->job_state != JOB_FAILED) && + (((struct job_record *) job_entry)->job_state != JOB_TIMEOUT)) return 0; return 1; @@ -1062,10 +1069,17 @@ void purge_old_job (void) { int i; + static time_t last_purge = (time_t) 0; + time_t now; + + now = time (NULL); + if (((now - last_purge) < MIN_JOB_AGE) || (job_list == NULL)) + return; + last_purge = now; i = list_delete_all (job_list, &list_find_job_old, NULL); if (i) { - info ("purge_old_job: purged %d old job records"); + info ("purge_old_job: purged %d old job records", i); last_job_update = time (NULL); } } diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index e10231427bf1e109574f6d3015d7e3f946a64030..b827fb82f5b2fe4419ac45cc09bf70aa3acf1d11 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1,8 +1,29 @@ -/* +/*****************************************************************************\ * job_scheduler.c - manage the scheduling of pending jobs in priority order - * - * author: moe jette, jette@llnl.gov - */ + * Note there is a global job list (job_list) + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Moe Jette <jette1@llnl.gov> + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ #ifdef HAVE_CONFIG_H # include <config.h> @@ -13,8 +34,8 @@ #include <stdlib.h> #include <string.h> -#include "list.h" -#include "slurmctld.h" +#include <src/common/list.h> +#include <src/slurmctld/slurmctld.h> struct job_queue { int priority; @@ -24,16 +45,6 @@ struct job_queue { int build_job_queue (struct job_queue **job_queue); void sort_job_queue (struct job_queue *job_queue, int job_queue_size); -#if DEBUG_MODULE -/* main is used here for module testing purposes only */ -int -main (int argc, char *argv[]) -{ - printf("No test functions presently available\n"); - exit (0); -} -#endif - /* * build_job_queue - build (non-priority ordered) list of pending jobs * input: job_queue - storage location for job queue @@ -83,6 +94,10 @@ build_job_queue (struct job_queue **job_queue) * order until a request fails * global: job_list - global list of job records * last_job_update - time of last update to job table + * Note: We re-build the queue every time. Jobs can not only be added + * or removed from the queue, but have their priority or partition + * changed with the update_job RPC. In general nodes will be in priority + * order (by submit time), so the sorting should be pretty fast. */ void schedule() @@ -133,7 +148,7 @@ schedule() /* - * sort_job_queue - sort a job queue in decending priority order + * sort_job_queue - sort job_queue in decending priority order * input: job_queue - pointer to un-sorted job queue * job_queue_size - count of elements in the job queue * output: job_queue - pointer to sorted job queue diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 19bbb5d089fd3fe2b401c102813785e42d6165ec..3c32ed58dce157dfb6dc869801d09a04081572cf 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -1,9 +1,31 @@ -/* +/*****************************************************************************\ * node_mgr.c - manage the node records of slurm - * see slurm.h for documentation on external functions and data structures - * - * author: moe jette, jette@llnl.gov - */ + * Note: there is a global node table (node_record_table_ptr), its + * hash table (hash_table), time stamp (last_node_update) and + * configuration list (config_list) + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by moe jette <jette1@llnl.gov> et. al. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ #ifdef HAVE_CONFIG_H # include <config.h> @@ -34,8 +56,7 @@ bitstr_t *idle_node_bitmap = NULL; /* bitmap of nodes are idle */ int delete_config_record (); void dump_hash (); int hash_index (char *name); -void split_node_name (char *name, char *prefix, char *suffix, int *index, - int *digits); +void split_node_name (char *name, char *prefix, char *suffix, int *index, int *digits); #if DEBUG_MODULE /* main is used here for testing purposes only */ @@ -148,11 +169,7 @@ main (int argc, char *argv[]) printf ("ERROR: node_name2bitmap error %d\n", error_code); error_count++; } - error_code = bitmap2node_name (map3, &out_line); - if (error_code) { - printf ("ERROR: bitmap2node_name error %d\n", error_code); - error_count++; - } + out_line = bitmap2node_name (map3); if (strcmp (out_line, "lx[01-02],lx04") != 0) printf ("ERROR: bitmap2node_name results bad %s vs %s\n", out_line, "lx[01-02],lx04"); @@ -237,35 +254,34 @@ main (int argc, char *argv[]) * bitmap2node_name - given a bitmap, build a list of comma separated node names. * names may include regular expressions (e.g. "lx[01-10]") * input: bitmap - bitmap pointer - * node_list - place to put node list - * output: node_list - set to node list or NULL on error + * output: returns pointer to node list or NULL on error * globals: node_record_table_ptr - pointer to node table * NOTE: the caller must xfree the memory at node_list when no longer required */ -void -bitmap2node_name (bitstr_t *bitmap, char **node_list) +char * +bitmap2node_name (bitstr_t *bitmap) { + char *node_list_ptr; int node_list_size, i; char prefix[MAX_NAME_LEN], suffix[MAX_NAME_LEN]; char format[MAX_NAME_LEN], temp[MAX_NAME_LEN]; char last_prefix[MAX_NAME_LEN], last_suffix[MAX_NAME_LEN]; int first_index = 0, last_index = 0, index, digits; - node_list[0] = NULL; node_list_size = 0; if (bitmap == NULL) fatal ("bitmap2node_name: bitmap is NULL"); - node_list[0] = xmalloc (BUF_SIZE); - strcpy (node_list[0], ""); + node_list_ptr = xmalloc (BUF_SIZE); + strcpy (node_list_ptr, ""); strcpy (last_prefix, ""); strcpy (last_suffix, ""); for (i = 0; i < node_record_count; i++) { if (node_list_size < - (strlen (node_list[0]) + MAX_NAME_LEN * 3)) { + (strlen (node_list_ptr) + MAX_NAME_LEN * 3)) { node_list_size += BUF_SIZE; - xrealloc (node_list[0], node_list_size); + xrealloc (node_list_ptr, node_list_size); } if (bit_test (bitmap, i) == 0) continue; @@ -279,30 +295,30 @@ bitmap2node_name (bitstr_t *bitmap, char **node_list) } if ((strlen (last_prefix) != 0) || /* end of a sequence */ (strlen (last_suffix) != 0)) { - if (strlen (node_list[0]) > 0) - strcat (node_list[0], ","); - strcat (node_list[0], last_prefix); + if (strlen (node_list_ptr) > 0) + strcat (node_list_ptr, ","); + strcat (node_list_ptr, last_prefix); if (first_index != last_index) - strcat (node_list[0], "["); + strcat (node_list_ptr, "["); strcpy (format, "%0"); sprintf (&format[2], "%dd", digits); sprintf (temp, format, first_index); - strcat (node_list[0], temp); + strcat (node_list_ptr, temp); if (first_index != last_index) { - strcat (node_list[0], "-"); + strcat (node_list_ptr, "-"); strcpy (format, "%0"); sprintf (&format[2], "%dd]", digits); sprintf (temp, format, last_index); - strcat (node_list[0], temp); + strcat (node_list_ptr, temp); } - strcat (node_list[0], last_suffix); + strcat (node_list_ptr, last_suffix); strcpy (last_prefix, ""); strcpy (last_suffix, ""); } if (index == NO_VAL) { - if (strlen (node_list[0]) > 0) - strcat (node_list[0], ","); - strcat (node_list[0], node_record_table_ptr[i].name); + if (strlen (node_list_ptr) > 0) + strcat (node_list_ptr, ","); + strcat (node_list_ptr, node_record_table_ptr[i].name); } else { strcpy (last_prefix, prefix); @@ -313,30 +329,33 @@ bitmap2node_name (bitstr_t *bitmap, char **node_list) if ((strlen (last_prefix) != 0) || /* end of a sequence */ (strlen (last_suffix) != 0)) { - if (strlen (node_list[0]) > 0) - strcat (node_list[0], ","); - strcat (node_list[0], last_prefix); + if (strlen (node_list_ptr) > 0) + strcat (node_list_ptr, ","); + strcat (node_list_ptr, last_prefix); if (first_index != last_index) - strcat (node_list[0], "["); + strcat (node_list_ptr, "["); strcpy (format, "%0"); sprintf (&format[2], "%dd", digits); sprintf (temp, format, first_index); - strcat (node_list[0], temp); + strcat (node_list_ptr, temp); if (first_index != last_index) { - strcat (node_list[0], "-"); + strcat (node_list_ptr, "-"); strcpy (format, "%0"); sprintf (&format[2], "%dd]", digits); sprintf (temp, format, last_index); - strcat (node_list[0], temp); + strcat (node_list_ptr, temp); } - strcat (node_list[0], last_suffix); + strcat (node_list_ptr, last_suffix); } - xrealloc (node_list[0], strlen (node_list[0]) + 1); + xrealloc (node_list_ptr, strlen (node_list_ptr) + 1); + return node_list_ptr; } /* * create_config_record - create a config_record entry and set is values to the defaults. + * each config record corresponds to a line in the slurm.conf file and typically + * describes the configuration of a large number of nodes * output: returns pointer to the config_record * global: default_config_record - default configuration values * NOTE: memory allocated will remain in existence until delete_config_record() is called @@ -447,7 +466,7 @@ delete_config_record () /* - * delete_node_record - delete record for node with specified name + * delete_node_record - delete the node record for a node with specified name * to avoid invalidating the bitmaps and hash table, we just clear the name * set its state to NODE_STATE_DOWN * input: name - name of the desired node @@ -500,7 +519,7 @@ dump_hash () /* - * find_node_record - find a record for node with specified name, + * find_node_record - find a record for node with specified name * input: name - name of the desired node * output: return pointer to node record or NULL if not found * global: node_record_table_ptr - pointer to global node table @@ -542,7 +561,8 @@ find_node_record (char *name) * this code is optimized for names containing a base-ten suffix (e.g. "lx04") * input: the node's name * output: return code is the hash table index - * global: hash_table - table of hash indecies + * global: hash_table - table of hash indexes + * slurmctld_conf.hash_base - numbering base for sequence numbers */ int hash_index (char *name) @@ -603,7 +623,7 @@ hash_index (char *name) /* * init_node_conf - initialize the node configuration tables and values. - * this should be called before creating any node or configuration entries. + * this should be called before creating any node or configuration entries. * output: return value - 0 if no error, otherwise an error code * global: node_record_table_ptr - pointer to global node table * default_node_record - default values for node records @@ -918,7 +938,7 @@ rehash () /* - * split_node_name - split a node name into prefix, suffix, and index value + * split_node_name - split a node name into prefix, suffix, index value, and digit count * input: name - the node name to parse * prefix, suffix, index, digits - location into which to store node name's constituents * output: prefix, suffix, index - the node name's constituents diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 9aed6a6e1813a224db6482d24a932f7e6ed79cb3..3ca7039b4514cc75a11378da9a6c06d301839d9d 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1,12 +1,28 @@ -/* +/*****************************************************************************\ * node_scheduler.c - select and allocated nodes to jobs - * see slurm.h for documentation on external functions and data structures - * - * NOTE: DEBUG_MODULE mode test with execution line - * node_scheduler ../../etc/slurm.conf2 ../../etc/slurm.jobs - * - * author: moe jette, jette@llnl.gov - */ + * Note: there is a global node table (node_record_table_ptr) ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Moe Jette <jette1@llnl.gov> + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ #ifdef HAVE_CONFIG_H # include <config.h> @@ -17,9 +33,9 @@ #include <stdlib.h> #include <string.h> #include <syslog.h> -#include <src/common/slurm_protocol_errno.h> -#include "slurmctld.h" +#include <src/common/slurm_protocol_errno.h> +#include <src/slurmctld/slurmctld.h> #define BUF_SIZE 1024 @@ -155,8 +171,7 @@ main (int argc, char *argv[]) #endif -/* allocate_nodes - for a given bitmap, change the state of specified nodes to stage_in - * this is a simple prototype for testing +/* allocate_nodes - for a given bitmap, change the state of specified nodes to NODE_STATE_ALLOCATED * globals: node_record_count - number of nodes in the system * node_record_table_ptr - pointer to global node table * last_node_update - last update time of node table @@ -200,8 +215,7 @@ count_cpus (unsigned *bitmap) } -/* deallocate_nodes - for a given bitmap, change the state of specified nodes to idle - * this is a simple prototype for testing +/* deallocate_nodes - for a given bitmap, change the state of specified nodes to NODE_STATE_IDLE * globals: node_record_count - number of nodes in the system * node_record_table_ptr - pointer to global node table */ @@ -223,7 +237,7 @@ deallocate_nodes (unsigned *bitmap) /* - * is_key_valid - determine if supplied key is valid + * is_key_valid - determine if supplied partition key is valid * input: key - a slurm key acquired by user root * output: returns 1 if key is valid, 0 otherwise * NOTE: this is only a placeholder for a future function @@ -239,7 +253,7 @@ is_key_valid (void * key) /* - * match_feature - determine if the desired feature (seek) is one of those available + * match_feature - determine if the desired feature is one of those available * input: seek - desired feature * available - comma separated list of features * output: returns 1 if found, 0 otherwise @@ -318,23 +332,18 @@ match_group (char *allow_groups, char *user_groups) /* - * pick_best_quadrics - identify the nodes which best fit the req_nodes and - * req_cpus counts for a system with Quadrics elan interconnect. + * pick_best_quadrics - Given a bitmap of nodes to select from (bitmap), a bitmap of + * nodes required by the job (req_bitmap), a count of required node (req_nodes), + * a count of required processors (req_cpus) and a flag indicating if consecutive nodes + * are required (0|1, consecutive), identify the nodes which "best" satify the request. * "best" is defined as either single set of consecutive nodes satisfying * the request and leaving the minimum number of unused nodes OR * the fewest number of consecutive node sets - * input: bitmap - the bit map to search - * req_bitmap - the bit map of nodes that must be selected, if not NULL these - * have already been confirmed to be in the input bitmap - * req_nodes - number of nodes required - * req_cpus - number of cpus required - * consecutive - nodes must be consecutive is 1, otherwise 0 - * output: bitmap - nodes not required to satisfy the request are cleared, - * other left set + * output: bitmap - nodes not required to satisfy the request are cleared, other left set * returns zero on success, EINVAL otherwise * globals: node_record_count - count of nodes configured * node_record_table_ptr - pointer to global node table - * NOTE: bitmap must be a superset of req_nodes at function call time + * NOTE: bitmap must be a superset of req_nodes at the time that pick_best_quadrics is called */ int pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes, @@ -506,7 +515,7 @@ pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes, /* - * pick_best_nodes - from nodes satisfying partition and configuration specifications, + * pick_best_nodes - from a weigh order table of all nodes satisfying a job's specifications, * select the "best" for use * input: node_set_ptr - pointer to node specification information * node_set_size - number of entries in records pointed to by node_set_ptr @@ -521,6 +530,15 @@ pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes, * returns 0 on success, EAGAIN if request can not be satisfied now, * EINVAL if request can never be satisfied (insufficient contiguous nodes) * NOTE: the caller must xfree memory pointed to by req_bitmap + * Notes: The algorithm is + * 1) If required node list is specified, determine implicitly required processor and node count + * 2) Determine how many disjoint required "features" are represented (e.g. "FS1|FS2") + * 3) For each feature: find matching node table entries, identify nodes that are up and + * available (idle or shared) and add them to a bit map, call pick_best_quadrics() to + * select the "best" of those based upon topology + * 4) If request can't be satified now, execute pick_best_quadrics() against the list + * of nodes that exist in any state (perhaps down or busy) to determine if the + * request can every be satified. */ int pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, @@ -585,8 +603,7 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, continue; if (runable == 0) { if (total_set) - bit_or (total_bitmap, - node_set_ptr[i].my_bitmap); + bit_or (total_bitmap, node_set_ptr[i].my_bitmap); else { total_bitmap = bit_copy (node_set_ptr[i].my_bitmap); if (total_bitmap == NULL) @@ -594,19 +611,15 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, total_set = 1; } total_nodes += node_set_ptr[i].nodes; - total_cpus += - (node_set_ptr[i].nodes * node_set_ptr[i].cpus_per_node); + total_cpus += (node_set_ptr[i].nodes * node_set_ptr[i].cpus_per_node); } - bit_and (node_set_ptr[i].my_bitmap, - up_node_bitmap); + bit_and (node_set_ptr[i].my_bitmap, up_node_bitmap); if (shared != 1) - bit_and (node_set_ptr[i].my_bitmap, - idle_node_bitmap); + bit_and (node_set_ptr[i].my_bitmap, idle_node_bitmap); node_set_ptr[i].nodes = bit_set_count (node_set_ptr[i].my_bitmap); if (avail_set) - bit_or (avail_bitmap, - node_set_ptr[i].my_bitmap); + bit_or (avail_bitmap, node_set_ptr[i].my_bitmap); else { avail_bitmap = bit_copy (node_set_ptr[i].my_bitmap); if (avail_bitmap == NULL) @@ -614,9 +627,7 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, avail_set = 1; } avail_nodes += node_set_ptr[i].nodes; - avail_cpus += - (node_set_ptr[i].nodes * - node_set_ptr[i].cpus_per_node); + avail_cpus += (node_set_ptr[i].nodes * node_set_ptr[i].cpus_per_node); if ((req_bitmap[0]) && (bit_super_set (req_bitmap[0], avail_bitmap) == 0)) continue; @@ -624,10 +635,7 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, continue; if (avail_cpus < req_cpus) continue; - pick_code = - pick_best_quadrics (avail_bitmap, req_bitmap[0], - req_nodes, req_cpus, - contiguous); + pick_code = pick_best_quadrics (avail_bitmap, req_bitmap[0], req_nodes, req_cpus, contiguous); if ((pick_code == 0) && (max_nodes != INFINITE) && (bit_set_count (avail_bitmap) > max_nodes)) { info ("pick_best_nodes: too many nodes selected %u partition maximum is %u", @@ -644,17 +652,13 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, return 0; } } + + /* determine if job could possibly run (if configured nodes all available) */ if ((error_code == 0) && (runable == 0) && (total_nodes > req_nodes) && (total_cpus > req_cpus) && - ((req_bitmap[0] == NULL) - || (bit_super_set (req_bitmap[0], total_bitmap) == 1)) - && ((max_nodes == INFINITE) || (req_nodes <= max_nodes))) { - /* determine if job could possibly run */ - /* (if configured nodes all available) */ - pick_code = - pick_best_quadrics (total_bitmap, req_bitmap[0], - req_nodes, req_cpus, - contiguous); + ((req_bitmap[0] == NULL) || (bit_super_set (req_bitmap[0], total_bitmap) == 1)) && + ((max_nodes == INFINITE) || (req_nodes <= max_nodes))) { + pick_code = pick_best_quadrics (total_bitmap, req_bitmap[0], req_nodes, req_cpus, contiguous); if ((pick_code == 0) && (max_nodes != INFINITE) && (bit_set_count (total_bitmap) > max_nodes)) { error_code = EINVAL; @@ -691,6 +695,12 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, * globals: list_part - global list of partition info * default_part_loc - pointer to default partition * config_list - global list of node configuration info + * Notes: The algorithm is + * 1) Build a table (node_set_ptr) of nodes with the requisite configuration + * Each table entry includes their weight, node_list, features, etc. + * 2) Call pick_best_nodes() to select those nodes best satisfying the request, + * (e.g. best-fit or other criterion) + * 3) Call allocate_nodes() to perform the actual allocation */ int select_nodes (struct job_record *job_ptr, int test_only) @@ -799,11 +809,10 @@ select_nodes (struct job_record *job_ptr, int test_only) node_set_ptr[node_set_index].cpus_per_node = config_record_point->cpus; node_set_ptr[node_set_index].weight = config_record_point->weight; node_set_ptr[node_set_index].feature = tmp_feature; -#if DEBUG_SYSTEM > 1 - info ("found %d usable nodes from configuration with %s", + debug ("found %d usable nodes from configuration with %s", node_set_ptr[node_set_index].nodes, config_record_point->nodes); -#endif + node_set_index++; xrealloc (node_set_ptr, sizeof (struct node_set) * (node_set_index + 1)); node_set_ptr[node_set_size++].my_bitmap = NULL; @@ -855,7 +864,7 @@ select_nodes (struct job_record *job_ptr, int test_only) } /* assign the nodes and stage_in the job */ - bitmap2node_name (req_bitmap, &(job_ptr->nodes)); + job_ptr->nodes = bitmap2node_name (req_bitmap); build_node_details (req_bitmap, &(job_ptr->num_cpu_groups), &(job_ptr->cpus_per_node), diff --git a/src/slurmctld/parse_spec.c b/src/slurmctld/parse_spec.c deleted file mode 100644 index 738a1b8c9032adc63c09ba333cc4e9858d8a9c30..0000000000000000000000000000000000000000 --- a/src/slurmctld/parse_spec.c +++ /dev/null @@ -1,271 +0,0 @@ -/* $Id$ */ - -/* slurm_parser() functionality - split out from bits_bytes.c and - * moved into slurmctld/ srcdir - */ - -#include <stdarg.h> -#include <stdlib.h> -#include <string.h> -#include <ctype.h> -#include <errno.h> - -#include "log.h" -#include "slurmctld.h" -#include "xmalloc.h" - -#define BUF_SIZE 1024 -#define SEPCHARS " \n\t" - -int -load_string (char **destination, char *keyword, char *in_line) ; -int -load_long (long *destination, char *keyword, char *in_line) ; -int -load_integer (int *destination, char *keyword, char *in_line) ; -int -load_float (float *destination, char *keyword, char *in_line) ; - -/* - * slurm_parser - parse the supplied specification into keyword/value pairs - * only the keywords supplied will be searched for. the supplied specification - * is altered, overwriting the keyword and value pairs with spaces. - * input: spec - pointer to the string of specifications - * sets of three values (as many sets as required): keyword, type, value - * keyword - string with the keyword to search for including equal sign - * (e.g. "name=") - * type - char with value 'd' for int, 'f' for float, 's' for string - * value - pointer to storage location for value (char **) for type 's' - * output: spec - everything read is overwritten by speces - * value - set to read value (unchanged if keyword not found) - * return - 0 if no error, otherwise errno code - * NOTE: terminate with a keyword value of "END" - * NOTE: values of type (char *) are xfreed if non-NULL. caller must xfree any - * returned value - */ -int -slurm_parser (char *spec, ...) -{ - va_list ap; - char *keyword, **str_ptr; - int error_code, *int_ptr, type; - long *long_ptr; - float *float_ptr; - - error_code = 0; - va_start(ap, spec); - while (error_code == 0) { - keyword = va_arg(ap, char *); - if (strcmp (keyword, "END") == 0) - break; - type = va_arg(ap, int); - switch (type) { - case 'd': - int_ptr = va_arg(ap, int *); - error_code = load_integer(int_ptr, keyword, spec); - break; - case 'f': - float_ptr = va_arg(ap, float *); - error_code = load_float(float_ptr, keyword, spec); - break; - case 'l': - long_ptr = va_arg(ap, long *); - error_code = load_long(long_ptr, keyword, spec); - break; - case 's': - str_ptr = va_arg(ap, char **); - error_code = load_string(str_ptr, keyword, spec); - break; - default: - fatal ("parse_spec: invalid type %c", type); - } - } - va_end(ap); - return error_code; -} - - -/* - * load_float - location into which result is stored - * keyword - string to search for - * in_line - string to search for keyword - * output: *destination - set to value, no change if value not found - * in_line - the keyword and value (if present) are overwritten by spaces - * return value - 0 if no error, otherwise an error code - * NOTE: in_line is overwritten, do not use a constant - */ -int -load_float (float *destination, char *keyword, char *in_line) -{ - char scratch[BUF_SIZE]; /* scratch area for parsing the input line */ - char *str_ptr1, *str_ptr2, *str_ptr3; - int i, str_len1, str_len2; - - str_ptr1 = (char *) strstr (in_line, keyword); - if (str_ptr1 != NULL) { - str_len1 = strlen (keyword); - strcpy (scratch, str_ptr1 + str_len1); - if ((scratch[0] < '0') && (scratch[0] > '9')) { - error ("load_float: bad value for keyword %s\n", keyword); - return EINVAL; - } - str_ptr2 = (char *) strtok_r (scratch, SEPCHARS, &str_ptr3); - str_len2 = strlen (str_ptr2); - *destination = (float) strtod (scratch, (char **) NULL); - for (i = 0; i < (str_len1 + str_len2); i++) { - str_ptr1[i] = ' '; - } - } - return 0; -} - - -/* - * load_integer - parse a string for a keyword, value pair - * input: *destination - location into which result is stored - * keyword - string to search for - * in_line - string to search for keyword - * output: *destination - set to value, no change if value not found, - * set to 1 if keyword found without value, - * set to -1 if keyword followed by "unlimited" - * in_line - the keyword and value (if present) are overwritten by spaces - * return value - 0 if no error, otherwise an error code - * NOTE: in_line is overwritten, do not use a constant - */ -int -load_integer (int *destination, char *keyword, char *in_line) -{ - char scratch[BUF_SIZE]; /* scratch area for parsing the input line */ - char *str_ptr1, *str_ptr2, *str_ptr3; - int i, str_len1, str_len2; - - str_ptr1 = (char *) strstr (in_line, keyword); - if (str_ptr1 != NULL) { - str_len1 = strlen (keyword); - strcpy (scratch, str_ptr1 + str_len1); - if ((scratch[0] == (char) NULL) || - (isspace ((int) scratch[0]))) { /* keyword with no value set */ - *destination = 1; - str_len2 = 0; - } - else { - str_ptr2 = - (char *) strtok_r (scratch, SEPCHARS, &str_ptr3); - str_len2 = strlen (str_ptr2); - if (strcmp (str_ptr2, "UNLIMITED") == 0) - *destination = -1; - else if ((str_ptr2[0] >= '0') && (str_ptr2[0] <= '9')) { - *destination = - (int) strtol (scratch, (char **) NULL, 10); - } - else { - error ("load_integer: bad value for keyword %s\n", - keyword); - return EINVAL; - } - } - - for (i = 0; i < (str_len1 + str_len2); i++) { - str_ptr1[i] = ' '; - } - } - return 0; -} - - -/* - * load_long - parse a string for a keyword, value pair - * input: *destination - location into which result is stored - * keyword - string to search for - * in_line - string to search for keyword - * output: *destination - set to value, no change if value not found, - * set to 1 if keyword found without value, - * set to -1 if keyword followed by "unlimited" - * in_line - the keyword and value (if present) are overwritten by spaces - * return value - 0 if no error, otherwise an error code - * NOTE: in_line is overwritten, do not use a constant - */ -int -load_long (long *destination, char *keyword, char *in_line) -{ - char scratch[BUF_SIZE]; /* scratch area for parsing the input line */ - char *str_ptr1, *str_ptr2, *str_ptr3; - int i, str_len1, str_len2; - - str_ptr1 = (char *) strstr (in_line, keyword); - if (str_ptr1 != NULL) { - str_len1 = strlen (keyword); - strcpy (scratch, str_ptr1 + str_len1); - if ((scratch[0] == (char) NULL) || - (isspace ((int) scratch[0]))) { /* keyword with no value set */ - *destination = 1; - str_len2 = 0; - } - else { - str_ptr2 = - (char *) strtok_r (scratch, SEPCHARS, &str_ptr3); - str_len2 = strlen (str_ptr2); - if (strcmp (str_ptr2, "UNLIMITED") == 0) - *destination = -1L; - else if ((str_ptr2[0] >= '0') && (str_ptr2[0] <= '9')) { - *destination = strtol (scratch, (char **) NULL, 10); - } - else { - error ("load_long: bad value for keyword %s\n", - keyword); - return EINVAL; - } - } - - for (i = 0; i < (str_len1 + str_len2); i++) { - str_ptr1[i] = ' '; - } - } - return 0; -} - - -/* - * load_string - parse a string for a keyword, value pair - * input: *destination - location into which result is stored - * keyword - string to search for - * in_line - string to search for keyword - * output: *destination - set to value, no change if value not found, - * if *destination had previous value, that memory location is automatically freed - * in_line - the keyword and value (if present) are overwritten by spaces - * return value - 0 if no error, otherwise an error code - * NOTE: destination must be free when no longer required - * NOTE: if destination is non-NULL at function call time, it will be freed - * NOTE: in_line is overwritten, do not use a constant - */ -int -load_string (char **destination, char *keyword, char *in_line) -{ - char scratch[BUF_SIZE]; /* scratch area for parsing the input line */ - char *str_ptr1, *str_ptr2, *str_ptr3; - int i, str_len1, str_len2; - - str_ptr1 = (char *) strstr (in_line, keyword); - if (str_ptr1 != NULL) { - str_len1 = strlen (keyword); - strcpy (scratch, str_ptr1 + str_len1); - if ((scratch[0] == (char) NULL) || - (isspace ((int) scratch[0]))) { /* keyword with no value set */ - error ("load_string: keyword %s lacks value\n", - keyword); - return EINVAL; - } - str_ptr2 = (char *) strtok_r (scratch, SEPCHARS, &str_ptr3); - str_len2 = strlen (str_ptr2); - if (destination[0] != NULL) - xfree (destination[0]); - destination[0] = (char *) xmalloc (str_len2 + 1); - strcpy (destination[0], str_ptr2); - for (i = 0; i < (str_len1 + str_len2); i++) { - str_ptr1[i] = ' '; - } - } - return 0; -} - - diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 2820a6d9ef4de17f1d27cd78465d2632e9c9a4f0..beac1ffb74c9293139d1089b8b34be1edebbf9b6 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -1,9 +1,30 @@ -/* +/*****************************************************************************\ * partition_mgr.c - manage the partition information of slurm - * see slurm.h for documentation on external functions and data structures - * - * author: moe jette, jette@llnl.gov - */ + * Note: there is a global partition list (part_list) and + * time stamp (last_part_update) + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Moe Jette <jette@llnl.gov> et. al. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ #ifdef HAVE_CONFIG_H # include <config.h> @@ -173,7 +194,8 @@ main (int argc, char *argv[]) * NOTE: this does not report nodes defined in more than one partition. this is checked only * upon reading the configuration file, not on an update */ -int build_part_bitmap (struct part_record *part_record_point) +int +build_part_bitmap (struct part_record *part_record_point) { int i, update_nodes; char *this_node_name ; @@ -254,7 +276,8 @@ int build_part_bitmap (struct part_record *part_record_point) * NOTE: the record's values are initialized to those of default_part * NOTE: allocates memory that should be xfreed with delete_part_record */ -struct part_record * create_part_record (void) +struct part_record * +create_part_record (void) { struct part_record *part_record_point; @@ -304,7 +327,8 @@ struct part_record * create_part_record (void) * output: return 0 on success, errno otherwise * global: part_list - global partition list */ -int delete_part_record (char *name) +int +delete_part_record (char *name) { int i; @@ -317,14 +341,13 @@ int delete_part_record (char *name) if ((name == NULL) || (i != 0)) return 0; - error ("delete_part_record: attempt to delete non-existent partition %s", - name); + error ("delete_part_record: attempt to delete non-existent partition %s", name); return ENOENT; } /* - * find_part_record - find a record for partition with specified name, + * find_part_record - find a record for partition with specified name * input: name - name of the desired partition * output: return pointer to node partition or null if not found * global: part_list - global partition list @@ -343,7 +366,8 @@ find_part_record (char *name){ * global: default_part - default partition values * part_list - global partition list */ -int init_part_conf () +int +init_part_conf () { last_part_update = time (NULL); diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index aa55b31796e6c85ff1b7c5c698ceb617b60a5131..0c4fca0095ab9b8af769ceb7c8c0a36feb78c93d 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -39,6 +39,7 @@ #include <src/common/hostlist.h> #include <src/common/list.h> #include <src/common/macros.h> +#include <src/common/parse_spec.h> #include <src/slurmctld/slurmctld.h> #define BUF_SIZE 1024 @@ -410,21 +411,17 @@ parse_config_spec (char *in_line) slurmctld_conf.epilog = epilog; } - if ( fast_schedule ) { + if ( fast_schedule ) slurmctld_conf.fast_schedule = fast_schedule; - } - if ( first_job_id ) { + if ( first_job_id ) slurmctld_conf.first_job_id = first_job_id; - } - if ( hash_base ) { + if ( hash_base ) slurmctld_conf.hash_base = hash_base; - } - if ( kill_wait ) { + if ( kill_wait ) slurmctld_conf.kill_wait = kill_wait; - } if ( prioritize ) { if ( slurmctld_conf.prioritize ) @@ -447,9 +444,8 @@ parse_config_spec (char *in_line) endservent (); } - if ( slurmctld_timeout ) { + if ( slurmctld_timeout ) slurmctld_conf.slurmctld_timeout = slurmctld_timeout; - } if ( slurmd_port ) { servent = getservbyname (slurmd_port, NULL); @@ -460,9 +456,8 @@ parse_config_spec (char *in_line) endservent (); } - if ( slurmd_timeout ) { + if ( slurmd_timeout ) slurmctld_conf.slurmd_timeout = slurmd_timeout; - } if ( state_save_location ) { if ( slurmctld_conf.state_save_location ) @@ -910,7 +905,7 @@ read_slurm_conf ( ) { list_sort (config_list, &list_compare_config); slurmctld_conf.last_update = time (NULL) ; - info ("read_slurm_conf: finished loading configuration, time =%ld", + info ("read_slurm_conf: finished loading configuration, time=%ld", (long) (clock () - start_time)); return SLURM_SUCCESS; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 0dc336523d4d99856f8d356a9d899e364f7b0eda..5c0e32800f2ca88f56f5aae0762f43111a6232aa 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -177,35 +177,21 @@ extern List job_list; /* list of job_record entries */ /* allocate_nodes - for a given bitmap, change the state of specified nodes to stage_in * this is a simple prototype for testing - * globals: node_record_count - number of nodes in the system - * node_record_table_ptr - pointer to global node table */ extern void allocate_nodes (unsigned *bitmap); /* - * bitmap2node_name - given a bitmap, build a node name list representation using - * regular expressions - * input: bitmap - bitmap pointer - * node_list - place to put node list - * output: node_list - set to node list or null on error - * NOTE: consider returning the node list as a regular expression if helpful - * NOTE: the caller must free memory at node_list when no longer required + * bitmap2node_name - given a bitmap, build a list of comma separated node names. + * names may include regular expressions (e.g. "lx[01-10]") + * NOTE: the caller must xfree the memory at node_list when no longer required */ -extern void bitmap2node_name (bitstr_t *bitmap, char **node_list); +extern char * bitmap2node_name (bitstr_t *bitmap) ; -/* - * count_cpus - report how many cpus are associated with the identified nodes - * input: bitmap - a node bitmap - * output: returns a cpu count - * globals: node_record_count - number of nodes configured - * node_record_table_ptr - pointer to global node table - */ +/* count_cpus - report how many cpus are associated with the identified nodes */ extern int count_cpus (unsigned *bitmap); /* * create_config_record - create a config_record entry and set is values to the defaults. - * output: returns pointer to the config_record - * global: default_config_record - default configuration values * NOTE: memory allocated will remain in existence until delete_config_record() is called * to deletet all configuration records */ @@ -214,23 +200,12 @@ extern struct config_record *create_config_record (void); /* * create_job_record - create an empty job_record including job_details. * load its values with defaults (zeros, nulls, and magic cookie) - * input: error_code - location to store error value in - * output: error_code - set to zero if no error, errno otherwise - * returns a pointer to the record or NULL if error - * global: job_list - global job list - * job_count - number of jobs in the system * NOTE: allocates memory that should be xfreed with list_delete_job */ extern struct job_record * create_job_record (int *error_code); /* * create_node_record - create a node record - * input: error_code - location to store error value in - * config_point - pointer to node's configuration information - * node_name - name of the node - * output: returns a pointer to the record or null if error - * note the record's values are initialized to those of default_node_record, node_name and - * config_point's cpus, real_memory, and tmp_disk values * NOTE: allocates memory that should be freed with delete_part_record */ extern struct node_record *create_node_record (struct config_record @@ -239,32 +214,23 @@ extern struct node_record *create_node_record (struct config_record /* * create_part_record - create a partition record - * output: returns a pointer to the record or NULL if error - * global: default_part - default partition parameters - * part_list - global partition list - * NOTE: the record's values are initialized to those of default_part * NOTE: allocates memory that should be xfreed with delete_part_record */ extern struct part_record *create_part_record (void); /* * create_step_record - create an empty step_record for the specified job. - * input: job_ptr - pointer to job table entry to have step record added - * output: returns a pointer to the record or NULL if error * NOTE: allocates memory that should be xfreed with delete_step_record */ extern struct step_record * create_step_record (struct job_record *job_ptr); /* deallocate_nodes - for a given bitmap, change the state of specified nodes to idle * this is a simple prototype for testing - * globals: node_record_count - number of nodes in the system - * node_record_table_ptr - pointer to global node table */ extern void deallocate_nodes (unsigned *bitmap); /* * delete_job_details - delete a job's detail record and clear it's pointer - * input: job_entry - pointer to job_record to clear the record of */ extern void delete_job_details (struct job_record *job_entry); @@ -272,130 +238,70 @@ extern void delete_job_details (struct job_record *job_entry); * delete_node_record - delete record for node with specified name * to avoid invalidating the bitmaps and hash table, we just clear the name * set its state to STATE_DOWN - * input: name - name of the desired node - * output: return 0 on success, errno otherwise */ extern int delete_node_record (char *name); -/* - * delete_part_record - delete record for partition with specified name - * input: name - name of the desired node - * output: return 0 on success, errno otherwise - */ +/* delete_part_record - delete record for partition with specified name */ extern int delete_part_record (char *name); -/* - * delete_step_record - delete record for job step for specified job_ptr and step_id - * input: job_ptr - pointer to job table entry to have step record added - * step_id - id of the desired job step - * output: return 0 on success, errno otherwise - */ +/* delete_step_record - delete record for job step for specified job_ptr and step_id */ extern int delete_step_record (struct job_record *job_ptr, uint32_t step_id); /* dump_job_desc - dump the incoming job submit request message */ void dump_job_desc(job_desc_msg_t * job_specs); -/* - * find_job_record - return a pointer to the job record with the given job_id - * input: job_id - requested job's id - * output: pointer to the job's record, NULL on error - * global: job_list - global job list pointer - */ +/* find_job_record - return a pointer to the job record with the given job_id */ extern struct job_record *find_job_record (uint32_t job_id); -/* - * find_node_record - find a record for node with specified name, - * input: name - name of the desired node - * output: return pointer to node record or null if not found - */ +/* find_node_record - find a record for node with specified name */ extern struct node_record *find_node_record (char *name); -/* - * find_part_record - find a record for partition with specified name, - * input: name - name of the desired partition - * output: return pointer to node partition or null if not found - * global: part_list - global partition list - */ +/* find_part_record - find a record for partition with specified name */ extern struct part_record *find_part_record (char *name); -/* - * find_step_record - return a pointer to the step record with the given job_id and step_id - * input: job_ptr - pointer to job table entry to have step record added - * step_id - id of the desired job step - * output: pointer to the job step's record, NULL on error - */ +/* find_step_record - return a pointer to the step record with the given job_id and step_id */ extern struct step_record * find_step_record(struct job_record *job_ptr, uint16_t step_id); /* * init_job_conf - initialize the job configuration tables and values. * this should be called after creating node information, but * before creating any job entries. - * output: return value - 0 if no error, otherwise an error code - * global: last_job_update - time of last job table update - * job_list - pointer to global job list */ extern int init_job_conf (); /* * init_node_conf - initialize the node configuration values. * this should be called before creating any node or configuration entries. - * output: return value - 0 if no error, otherwise an error code */ extern int init_node_conf (); /* * init_part_conf - initialize the partition configuration values. * this should be called before creating any partition entries. - * output: return value - 0 if no error, otherwise an error code */ extern int init_part_conf (); /* * init_slurm_conf - initialize or re-initialize the slurm configuration * values. this should be called before calling read_slurm_conf. - * output: return value - 0 if no error, otherwise an error code */ extern int init_slurm_conf (); +/* is_key_valid report if the supplied partition key is valid */ extern int is_key_valid (void * key); +/* job_allocate - allocate resource for the supplied job specifications */ extern int job_allocate (job_desc_msg_t *job_specs, uint32_t *new_job_id, char **node_list, uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, int immediate, int will_run); -/* - * job_cancel - cancel the specified job - * input: job_id - id of the job to be cancelled - * output: returns 0 on success, otherwise ESLURM error code - * global: job_list - pointer global job list - * last_job_update - time of last job table update - */ +/* job_cancel - cancel the specified job */ extern int job_cancel (uint32_t job_id); -/* - * job_step_cancel - cancel the specified job step - * input: job_id, step_id - id of the job to be cancelled - * output: returns 0 on success, otherwise ESLURM error code - * global: job_list - pointer global job list - * last_job_update - time of last job table update - */ +/* job_step_cancel - cancel the specified job step */ extern int job_step_cancel (uint32_t job_id, uint32_t job_step_id); -/* - * job_create - parse the suppied job specification and create job_records for it - * input: job_specs - job specifications - * new_job_id - location for storing new job's id - * job_rec_ptr - place to park pointer to the job (or NULL) - * output: new_job_id - the job's ID - * returns 0 on success, EINVAL if specification is invalid - * allocate - if set, job allocation only (no script required) - * will_run - if set then test only, don't create a job entry - * job_rec_ptr - pointer to the job (if not passed a NULL) - * globals: job_list - pointer to global job list - * list_part - global list of partition info - * default_part_loc - pointer to default partition - * job_hash, job_hash_over, max_hash_over - hash table into job records - */ +/* job_create - create a job table record for the supplied specifications */ extern int job_create (job_desc_msg_t * job_specs, uint32_t *new_job_id, int allocate, int will_run, struct job_record **job_rec_ptr); @@ -405,85 +311,26 @@ extern void job_lock (); /* job_unlock - unlock the job information */ extern void job_unlock (); -/* list_compare_config - compare two entry from the config list based upon weight, - * see list.h for documentation */ +/* list_compare_config - compare two entry from the config list based upon weight */ extern int list_compare_config (void *config_entry1, void *config_entry2); -/* list_delete_config - delete an entry from the configuration list, - *see list.h for documentation */ +/* list_delete_config - delete an entry from the configuration list */ extern void list_delete_config (void *config_entry); -/* list_find_config - find an entry in the configuration list, - * see list.h for documentation - * key is partition name or "universal_key" for all configuration */ +/* list_find_config - find an entry in the configuration list */ extern int list_find_config (void *config_entry, void *key); /* list_delete_part - delete an entry from the partition list, * see list.h for documentation */ extern void list_delete_part (void *part_entry); -/* list_find_part - find an entry in the partition list, - * see list.h for documentation - * key is partition name or "universal_key" for all partitions */ +/* list_find_part - find an entry in the partition list */ extern int list_find_part (void *part_entry, void *key); -/* - * load_float - location into which result is stored - * keyword - string to search for - * in_line - string to search for keyword - * output: *destination - set to value, no change if value not found - * in_line - the keyword and value (if present) are overwritten by spaces - * return value - 0 if no error, otherwise an error code - * NOTE: in_line is overwritten, do not use a constant - */ -extern int load_float (float *destination, char *keyword, char *in_line); - -/* - * load_integer - parse a string for a keyword, value pair - * input: *destination - location into which result is stored - * keyword - string to search for - * in_line - string to search for keyword - * output: *destination - set to value, no change if value not found, - * set to 1 if keyword found without value, - * set to -1 if keyword followed by "unlimited" - * in_line - the keyword and value (if present) are overwritten by spaces - * return value - 0 if no error, otherwise an error code - * NOTE: in_line is overwritten, do not use a constant - */ -extern int load_integer (int *destination, char *keyword, char *in_line); - -extern int load_long (long *destination, char *keyword, char *in_line); - -/* - * load_string - parse a string for a keyword, value pair - * input: *destination - location into which result is stored - * keyword - string to search for - * in_line - string to search for keyword - * output: *destination - set to value, no change if value not found, - * if *destination had previous value, that memory location is automatically freed - * in_line - the keyword and value (if present) are overwritten by spaces - * return value - 0 if no error, otherwise an error code - * NOTE: destination must be free when no longer required - * NOTE: if destination is non-null at function call time, it will be freed - * NOTE: in_line is overwritten, do not use a constant - */ -extern int load_string (char **destination, char *keyword, char *in_line); - -/* - * match_feature - determine if the desired feature (seek) is one of those available - * input: seek - desired feature - * available - comma separated list of features - * output: returns 1 if found, 0 otherwise - */ +/* match_feature - determine if the desired feature (seek) is one of those available */ extern int match_feature (char *seek, char *available); -/* - * match_group - determine if the user is a member of any groups permitted to use this partition - * input: allow_groups - comma delimited list of groups permitted to use the partition, - * NULL is for all groups - * user_groups - comma delimited list of groups the user belongs to - * output: returns 1 if user is member, 0 otherwise - */ +/* match_group - determine if the user is a member of any groups permitted to use this partition */ extern int match_group (char *allow_groups, char *user_groups); /* node_lock - lock the node and configuration information */ @@ -492,19 +339,14 @@ extern void node_lock (); /* node_unlock - unlock the node and configuration information */ extern void node_unlock (); -/* - * node_name2bitmap - given a node name regular expression, build a bitmap representation - * input: node_names - list of nodes - * bitmap - place to put bitmap pointer - * output: bitmap - set to bitmap or null on error - * returns 0 if no error, otherwise EINVAL or ENOMEM - * NOTE: the caller must free memory at bitmap when no longer required - */ +/* node_name2bitmap - given a node name regular expression, build a bitmap representation */ extern int node_name2bitmap (char *node_names, bitstr_t **bitmap); /* * pack_all_jobs - dump all job information for all jobs in - * machine independent form (for network transmission) */ + * machine independent form (for network transmission) + * NOTE: the caller must xfree the buffer at *buffer_ptr when no longer required + */ extern void pack_all_jobs (char **buffer_ptr, int *buffer_size, time_t * update_time); @@ -516,31 +358,39 @@ extern void pack_all_node (char **buffer_ptr, int *buffer_size, time_t * update_ /* * pack_all_part - dump all partition information for all partitions in - * machine independent form (for network transmission) */ + * machine independent form (for network transmission) + * NOTE: the caller must xfree the buffer at *buffer_ptr when no longer required + */ extern void pack_all_part (char **buffer_ptr, int *buffer_size, time_t * update_time); /* * pack_all_step - dump all job step information for all steps in - * machine independent form (for network transmission) */ + * machine independent form (for network transmission) + * NOTE: the caller must xfree the buffer at *buffer_ptr when no longer required + */ extern void pack_all_step (char **buffer_ptr, int *buffer_size, time_t * update_time); /* * pack_job - dump all configuration information about a specific job in - * machine independent form (for network transmission) */ + * machine independent form (for network transmission) + */ extern void pack_job (struct job_record *dump_job_ptr, void **buf_ptr, int *buf_len); /* pack_node - dump all configuration information about a specific node in - * machine independent form (for network transmission) */ + * machine independent form (for network transmission) + */ extern void pack_node (struct node_record *dump_node_ptr, void **buf_ptr, int *buf_len); /* * pack_part - dump all configuration information about a specific partition in - * machine independent form (for network transmission) */ + * machine independent form (for network transmission) + */ extern void pack_part (struct part_record *part_record_point, void **buf_ptr, int *buf_len); /* * pack_step - dump state information about a specific job step in - * machine independent form (for network transmission) */ + * machine independent form (for network transmission) + */ extern void pack_step (struct step_record *dump_step_ptr, void **buf_ptr, int *buf_len); /* part_lock - lock the partition information */ @@ -555,66 +405,19 @@ extern void part_unlock (); */ void purge_old_job (void); -/* - * read_slurm_conf - load the slurm configuration from the configured file. - * read_slurm_conf can be called more than once if so desired. - * output: return - 0 if no error, otherwise an error code - * NOTE: call init_slurm_conf before ever calling read_slurm_conf. - */ +/* read_slurm_conf - load the slurm configuration from the configured file */ extern int read_slurm_conf ( ); -/* - * rehash - build a hash table of the node_record entries. this is a large hash table - * to permit the immediate finding of a record based only upon its name without regards - * to the number. there should be no need for a search. the algorithm is optimized for - * node names with a base-ten sequence number suffix. if you have a large cluster and - * use a different naming convention, this function and/or the hash_index function - * should be re-written. - * global: node_record_table_ptr - pointer to global node table - * hash_table - table of hash indecies - * NOTE: allocates memory for hash_table - */ +/* rehash - build a hash table of the node_record entries */ extern void rehash (); -/* - * report_leftover - report any un-parsed (non-whitespace) characters on the - * configuration input line. - * input: in_line - what is left of the configuration input line. - * line_num - line number of the configuration file. - * output: none - */ -/* extern void report_leftover (char *in_line, int line_num); */ - - -/* - * reset_job_bitmaps - reestablish bitmaps for existing jobs. - * this should be called after rebuilding node information, - * but before using any job entries. - * global: last_job_update - time of last job table update - * job_list - pointer to global job list - */ +/* reset_job_bitmaps - reestablish bitmaps for existing jobs */ extern void reset_job_bitmaps (); -/* - * schedule - attempt to schedule all pending jobs - * pending jobs for each partition will be scheduled in priority - * order until a request fails - * global: job_list - global list of job records - * last_job_update - time of last update to job table - */ +/* schedule - attempt to schedule all pending jobs */ void schedule(); -/* - * select_nodes - select and allocate nodes to a specific job - * input: job_ptr - pointer to the job record - * test_only - do not allocate nodes, just confirm they could be allocated now - * output: returns 0 on success, EINVAL if not possible to satisfy request, - * or EAGAIN if resources are presently busy - * job_ptr->nodes is set to the node list (on success) - * globals: list_part - global list of partition info - * default_part_loc - pointer to default partition - * config_list - global list of node configuration info - */ +/* select_nodes - select and allocate nodes to a specific job */ extern int select_nodes (struct job_record *job_ptr, int test_only); /* set_job_id - set a default job_id, insure that it is unique */ @@ -623,76 +426,26 @@ extern void set_job_id (struct job_record *job_ptr); /* set_job_prio - set a default job priority */ extern void set_job_prio (struct job_record *job_ptr); -/* - * slurm_parser - parse the supplied specification into keyword/value pairs - * only the keywords supplied will be searched for. the supplied specification - * is altered, overwriting the keyword and value pairs with spaces. - * input: spec - pointer to the string of specifications - * sets of three values (as many sets as required): keyword, type, value - * keyword - string with the keyword to search for including equal sign - * (e.g. "name=") - * type - char with value 'd' for int, 'f' for float, 's' for string - * value - pointer to storage location for value (char **) for type 's' - * output: spec - everything read is overwritten by speces - * value - set to read value (unchanged if keyword not found) - * return - 0 if no error, otherwise errno code - * NOTE: terminate with a keyword value of "END" - * NOTE: values of type (char *) are xfreed if non-NULL. caller must xfree any - * returned value - */ -extern int slurm_parser (char *spec, ...); - -/* - * step_create - parse the suppied job step specification and create step_records for it - * input: step_specs - job step specifications - * output: returns 0 on success, EINVAL if specification is invalid - * NOTE: the calling program must xfree the memory pointed to by new_job_id - */ +/* step_create - parse the suppied job step specification and create step_records for it */ extern int step_create ( step_specs *step_specs, struct step_record** ); -/* step_lock - lock the step information - * global: step_mutex - semaphore for the step table - */ +/* step_lock - lock the step information */ extern void step_lock (); -/* step_unlock - unlock the step information - * global: step_mutex - semaphore for the step table - */ +/* step_unlock - unlock the step information */ extern void step_unlock (); -/* - * update_job - update a job's parameters per the supplied specifications - * output: returns 0 on success, otherwise an error code from common/slurm_protocol_errno.h - * global: job_list - global list of job entries - * last_job_update - time of last job table update - */ +/* update_job - update a job's parameters per the supplied specification */ extern int update_job (job_desc_msg_t * job_specs); -/* - * update_node - update the configuration data for one or more nodes - * input: node_names - node names, may contain regular expression - * spec - the updates to the node's specification - * output: return - 0 if no error, otherwise an error code - */ +/* update_node - update the configuration data for one or more nodes per the supplied specification */ extern int update_node ( update_node_msg_t * update_node_msg ) ; -/* - * update_part - update a partition's configuration data - * global: part_list - list of partition entries - * last_part_update - update time of partition records - */ +/* update_part - update a partition's configuration data per the supplied specification */ extern int update_part (update_part_msg_t * part_desc ); -/* - * validate_node_specs - validate the node's specifications as valid, - * if not set state to down, in any case update last_response - * input: node_name - name of the node - * cpus - number of cpus measured - * real_memory - mega_bytes of real_memory measured - * tmp_disk - mega_bytes of tmp_disk measured - * output: returns 0 if no error, enoent if no such node, einval if values too low - */ +/* validate_node_specs - validate the node's specifications as valid */ extern int validate_node_specs (char *node_name, uint32_t cpus, uint32_t real_memory, uint32_t tmp_disk); diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 5e0a147f2d6cb1d6da95e7c8cb7f6a4df215fb1c..f317ac34e099182de78284e11a776d7dff991bc2 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -3,7 +3,7 @@ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by moe jette <jette1@llnl.gov>. + * Written by moe jette <jette1@llnl.gov>, Joseph Ekstrom (ekstrom1@llnl.gov) * UCRL-CODE-2002-040. * * This file is part of SLURM, a resource management program.