diff --git a/src/api/allocate.c b/src/api/allocate.c index 6b98d4d885db3bcce93636c79ad11f89f8741636..ef61fcf527a4cd1838b09fcf76bc36ba831aafd8 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -13,6 +13,7 @@ #include <errno.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <syslog.h> #include <sys/socket.h> @@ -20,7 +21,6 @@ #include <arpa/inet.h> #include <unistd.h> -#include "slurm.h" #include "slurmlib.h" #if DEBUG_MODULE @@ -34,10 +34,8 @@ main (int argc, char *argv[]) error_code = slurm_allocate ("User=1500 Script=/bin/hostname JobName=job01 TotalNodes=400 TotalProcs=1000 ReqNodes=lx[3000-3003] Partition=batch MinRealMemory=1024 MinTmpDisk=2034 Groups=students,employee MinProcs=4 Contiguous=YES Key=1234", &node_list, &job_id); - if (error_code) { + if (error_code) printf ("allocate error %d\n", error_code); - exit (error_code); - } else { printf ("allocate nodes %s to job %s\n", node_list, job_id); free (node_list); diff --git a/src/api/job_info.c b/src/api/job_info.c index 5687383e35644d6334cdd1224419778586a8dc4f..be33a197764eccf5b6793203fb1e50ce15b528f2 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -13,6 +13,7 @@ #include <errno.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <syslog.h> #include <sys/socket.h> @@ -20,12 +21,9 @@ #include <arpa/inet.h> #include <unistd.h> -#include "slurm.h" +#include "pack.h" #include "slurmlib.h" -char *job_api_buffer = NULL; -int job_api_buffer_size = 0; - #if DEBUG_MODULE /* main is used here for testing purposes only */ int @@ -33,84 +31,95 @@ main (int argc, char *argv[]) { static time_t last_update_time = (time_t) NULL; int error_code, i; - char req_name[MAX_ID_LEN]; /* name of the job_id */ - char next_name[MAX_ID_LEN]; /* name of the next job_id */ - char job_name[MAX_NAME_LEN], partition[MAX_NAME_LEN]; - char job_state[MAX_NAME_LEN], node_list[FEATURE_SIZE]; - int time_limit, user_id; - time_t start_time, end_time; - int priority; + struct job_buffer *job_buffer_ptr = NULL; + struct job_table *job_ptr; - error_code = load_job (&last_update_time); + error_code = slurm_load_job (last_update_time, &job_buffer_ptr); if (error_code) { - printf ("load_job error %d\n", error_code); + printf ("slurm_load_job error %d\n", error_code); exit (error_code); } - strcpy (req_name, ""); /* start at beginning of job list */ - for (i = 1;; i++) { - error_code = - load_job_config (req_name, next_name, job_name, - partition, &user_id, job_state, node_list, - &time_limit, &start_time, &end_time, &priority); - - if (error_code != 0) { - printf ("load_job_config error %d on %s\n", - error_code, req_name); - break; - } - if ((i < 10) || (i % 10 == 0)) { - printf ("found JobId=%s JobName=%s Partition=%s ", - req_name, job_name, partition); - printf ("user_id=%d job_state=%s node_list=%s ", - user_id, job_state, node_list); - printf ("time_limit=%d priority=%d ", - time_limit, priority); - printf ("start_time=%lx end_time=%lx\n", - (long)start_time, (long)end_time); - } - else if ((i==10) || (i % 10 == 1)) - printf ("skipping...\n"); + printf("Updated at %lx, record count %d\n", + job_buffer_ptr->last_update, job_buffer_ptr->job_count); + job_ptr = job_buffer_ptr->job_table_ptr; + + for (i = 0; i < job_buffer_ptr->job_count; i++) { + printf ("JobId=%s UserId=%u ", + job_ptr[i].job_id, job_ptr[i].user_id); + printf ("JobState=%u TimeLimit=%u ", + job_ptr[i].job_state, job_ptr[i].time_limit); + printf ("Priority=%u Partition=%s\n", + job_ptr[i].priority, job_ptr[i].partition); + + printf (" Name=%s Nodes=%s ", + job_ptr[i].name, job_ptr[i].nodes); + printf ("StartTime=%x EndTime=%x\n", + (uint32_t) job_ptr[i].start_time, + (uint32_t) job_ptr[i].end_time); + + printf (" ReqProcs=%u ReqNodes=%u ", + job_ptr[i].num_procs, job_ptr[i].num_nodes); + printf ("Shared=%u Contiguous=%u\n", + job_ptr[i].shared, job_ptr[i].contiguous); + + printf (" MinProcs=%u MinMemory=%u ", + job_ptr[i].min_procs, job_ptr[i].min_memory); + printf ("MinTmpDisk=%u TotalProcs=%u\n", + job_ptr[i].min_tmp_disk, job_ptr[i].total_procs); + + printf (" ReqNodes=%s Features=%s ", + job_ptr[i].req_nodes, job_ptr[i].features); + printf ("JobScript=%s\n\n", + job_ptr[i].job_script); - if (strlen (next_name) == 0) - break; - strcpy (req_name, next_name); } - free_job_info (); - exit (error_code); + slurm_free_job_info (job_buffer_ptr); + exit (0); } #endif /* - * free_job_info - free the job information buffer (if allocated) - * NOTE: buffer is loaded by load_job and used by load_job_name. + * slurm_free_job_info - free the job information buffer (if allocated) + * NOTE: buffer is loaded by load_job. */ void -free_job_info (void) +slurm_free_job_info (struct job_buffer *job_buffer_ptr) { - if (job_api_buffer) - free (job_api_buffer); + if (job_buffer_ptr == NULL) + return; + if (job_buffer_ptr->raw_buffer_ptr) + free (job_buffer_ptr->raw_buffer_ptr); + if (job_buffer_ptr->job_table_ptr) + free (job_buffer_ptr->job_table_ptr); } /* - * load_job - load the supplied job information buffer for use by info gathering - * APIs if job records have changed since the time specified. - * input: buffer - pointer to job information buffer - * buffer_size - size of buffer - * output: returns 0 if no error, EINVAL if the buffer is invalid, - * ENOMEM if malloc failure - * NOTE: buffer is used by load_job_config and freed by free_job_info. + * slurm_load_job - load the supplied job information buffer for use by info + * gathering APIs if job records have changed since the time specified. + * input: update_time - time of last update + * job_buffer_ptr - place to park job_buffer pointer + * output: job_buffer_ptr - pointer to allocated job_buffer + * returns -1 if no update since update_time, + * 0 if update with no error, + * EINVAL if the buffer (version or otherwise) is invalid, + * ENOMEM if malloc failure + * NOTE: the allocated memory at job_buffer_ptr freed by slurm_free_job_info. */ int -load_job (time_t * last_update_time) { - int buffer_offset, buffer_size, error_code, in_size, version; - char request_msg[64], *buffer, *my_line; - int sockfd; +slurm_load_job (time_t update_time, struct job_buffer **job_buffer_ptr) +{ + int buffer_offset, buffer_size, in_size, i, sockfd; + char request_msg[64], *buffer; + void *buf_ptr; struct sockaddr_in serv_addr; - unsigned long my_time; + uint16_t uint16_tmp; + uint32_t uint32_tmp, uint32_time; + struct job_table *job; + *job_buffer_ptr = NULL; if ((sockfd = socket (AF_INET, SOCK_STREAM, 0)) < 0) return EINVAL; serv_addr.sin_family = PF_INET; @@ -123,7 +132,7 @@ load_job (time_t * last_update_time) { return EINVAL; } sprintf (request_msg, "DumpJob LastUpdate=%lu", - (long) (*last_update_time)); + (long) (update_time)); if (send (sockfd, request_msg, strlen (request_msg) + 1, 0) < strlen (request_msg)) { close (sockfd); @@ -155,143 +164,72 @@ load_job (time_t * last_update_time) { return ENOMEM; if (strcmp (buffer, "nochange") == 0) { free (buffer); - return 0; - } + return -1; + } /* load buffer's header (data structure version and time) */ - buffer_offset = 0; - error_code = - read_buffer (buffer, &buffer_offset, buffer_size, &my_line); - if ((error_code) || (strlen (my_line) < strlen (HEAD_FORMAT))) { -#if DEBUG_SYSTEM - fprintf (stderr, - "load_job: job buffer lacks valid header\n"); -#else - syslog (LOG_ERR, - "load_job: job buffer lacks valid header\n"); -#endif + buf_ptr = buffer; + unpack32 (&uint32_tmp, &buf_ptr, &buffer_size); + if (uint32_tmp != JOB_STRUCT_VERSION) { free (buffer); return EINVAL; - } - sscanf (my_line, HEAD_FORMAT, &my_time, &version); + } + unpack32 (&uint32_time, &buf_ptr, &buffer_size); + + /* load individual job info */ + job = NULL; + for (i = 0; buffer_size > 0; i++) { + job = realloc (job, sizeof(struct job_table) * (i+1)); + if (job == NULL) { + free (buffer); + return ENOMEM; + } + unpackstr_ptr (&job[i].job_id, &uint16_tmp, + &buf_ptr, &buffer_size); + unpack32 (&job[i].user_id, &buf_ptr, &buffer_size); + unpack16 (&job[i].job_state, &buf_ptr, &buffer_size); + unpack32 (&job[i].time_limit, &buf_ptr, &buffer_size); + + unpack32 (&uint32_tmp, &buf_ptr, &buffer_size); + job[i].start_time = (time_t) uint32_tmp; + unpack32 (&uint32_tmp, &buf_ptr, &buffer_size); + job[i].end_time = (time_t) uint32_tmp; + unpack32 (&job[i].priority, &buf_ptr, &buffer_size); + + unpackstr_ptr (&job[i].nodes, &uint16_tmp, + &buf_ptr, &buffer_size); + unpackstr_ptr (&job[i].partition, &uint16_tmp, + &buf_ptr, &buffer_size); + unpackstr_ptr (&job[i].name, &uint16_tmp, + &buf_ptr, &buffer_size); + + unpack32 (&job[i].num_procs, &buf_ptr, &buffer_size); + unpack32 (&job[i].num_nodes, &buf_ptr, &buffer_size); + unpack16 (&job[i].shared, &buf_ptr, &buffer_size); + unpack16 (&job[i].contiguous, &buf_ptr, &buffer_size); + + unpack32 (&job[i].min_procs, &buf_ptr, &buffer_size); + unpack32 (&job[i].min_memory, &buf_ptr, &buffer_size); + unpack32 (&job[i].min_tmp_disk, &buf_ptr, &buffer_size); + unpack32 (&job[i].total_procs, &buf_ptr, &buffer_size); + + unpackstr_ptr (&job[i].req_nodes, &uint16_tmp, + &buf_ptr, &buffer_size); + unpackstr_ptr (&job[i].features, &uint16_tmp, + &buf_ptr, &buffer_size); + unpackstr_ptr (&job[i].job_script, &uint16_tmp, + &buf_ptr, &buffer_size); + } - if (version != JOB_STRUCT_VERSION) { -#if DEBUG_SYSTEM - fprintf (stderr, "load_part: expect version %d, read %d\n", - NODE_STRUCT_VERSION, version); -#else - syslog (LOG_ERR, "load_part: expect version %d, read %d\n", - NODE_STRUCT_VERSION, version); -#endif + *job_buffer_ptr = malloc (sizeof (struct job_buffer)); + if (*job_buffer_ptr == NULL) { free (buffer); - return EINVAL; - } - - *last_update_time = (time_t) my_time; - job_api_buffer = buffer; - job_api_buffer_size = buffer_size; + free (job); + return ENOMEM; + } + (*job_buffer_ptr)->last_update = (time_t) uint32_time; + (*job_buffer_ptr)->job_count = i; + (*job_buffer_ptr)->raw_buffer_ptr = buffer; + (*job_buffer_ptr)->job_table_ptr = job; return 0; } - - -/* - * load_job_config - load the state information about the named job - * input: req_name - job_id of the job for which information is requested - * if "", then get info for the first job in list - * next_name - location into which the name of the next job_id is - * stored, "" if no more - * job_name, etc. - pointers into which the information is to be stored - * output: next_name - job_id of the next job in the list - * job_name, etc. - the job's state information - * returns 0 on success, ENOENT if not found, or EINVAL if buffer is bad - * NOTE: req_name and next_name must be declared by the caller and - * have length MAX_ID_LEN or larger - * NOTE: job_name, partition, and job_state must be declared by the caller and - * have length MAX_NAME_LEN or larger - * NOTE: node_list must be declared by the caller and - * have length FEATURE_SIZE or larger (NOT SUFFICIENT, TEMPORARY USE ONLY) - * NOTE: buffer is loaded by load_job and freed by free_job_info. - */ -int -load_job_config (char *req_name, char *next_name, char *job_name, - char *partition, int *user_id, char *job_state, - char *node_list, int *time_limit, time_t *start_time, - time_t *end_time, int *priority) - -{ - int error_code, version, buffer_offset, my_user_id; - static time_t last_update_time, update_time; - struct job_record my_job; - static char next_job_id_value[MAX_ID_LEN]; - static int last_buffer_offset; - char my_job_id[MAX_ID_LEN], *my_line; - unsigned long my_time; - long my_start_time, my_end_time; - - /* load buffer's header (data structure version and time) */ - buffer_offset = 0; - error_code = - read_buffer (job_api_buffer, &buffer_offset, - job_api_buffer_size, &my_line); - if (error_code) - return error_code; - sscanf (my_line, HEAD_FORMAT, &my_time, &version); - update_time = (time_t) my_time; - - if ((update_time == last_update_time) - && (strcmp (req_name, next_job_id_value) == 0) - && (strlen (req_name) != 0)) - buffer_offset = last_buffer_offset; - last_update_time = update_time; - - while (1) { - /* load all information for next job */ - error_code = - read_buffer (job_api_buffer, &buffer_offset, - job_api_buffer_size, &my_line); - if (error_code == EFAULT) - break; /* end of buffer */ - if (error_code) - return error_code; - sscanf (my_line, JOB_STRUCT_FORMAT1, - my_job_id, - partition, - job_name, - &my_user_id, - node_list, - job_state, - &my_job.time_limit, - &my_start_time, - &my_end_time, - &my_job.priority); - if (strlen (req_name) == 0) - strncpy (req_name, my_job_id, MAX_ID_LEN); - - /* check if this is requested job */ - if (strcmp (req_name, my_job_id) != 0) - continue; - - /*load values to be returned */ - *user_id = my_user_id; - *time_limit = my_job.time_limit; - *start_time = (time_t) my_start_time; - *end_time = (time_t) my_end_time; - *priority = my_job.priority; - - last_buffer_offset = buffer_offset; - error_code = - read_buffer (job_api_buffer, &buffer_offset, - job_api_buffer_size, &my_line); - if (error_code) { /* no more records */ - strcpy (next_job_id_value, ""); - strcpy (next_name, ""); - } - else { - sscanf (my_line, "JobId=%s", my_job_id); - strncpy (next_job_id_value, my_job_id, MAX_ID_LEN); - strncpy (next_name, my_job_id, MAX_ID_LEN); - } - return 0; - } - return ENOENT; -} diff --git a/src/api/node_info.c b/src/api/node_info.c index 60bb0e6bc16a6d2dcf196e496a80ce3f381d456a..17031743859bc126f148860a99170fed7727e0f1 100644 --- a/src/api/node_info.c +++ b/src/api/node_info.c @@ -40,11 +40,13 @@ main (int argc, char *argv[]) exit (error_code); } - printf("Updated at %lx, record count %d\n", + printf("Updated at %lx, record count %u\n", node_buffer_ptr->last_update, node_buffer_ptr->node_count); node_ptr = node_buffer_ptr->node_table_ptr; for (i = 0; i < node_buffer_ptr->node_count; i++) { + /* to limit output we print only the first 10 entries, + * last 1 entry, and every 200th entry */ if ((i < 10) || (i % 200 == 0) || ((i + 1) == node_buffer_ptr->node_count)) { printf ("NodeName=%s CPUs=%u ", diff --git a/src/common/pack.h b/src/common/pack.h index 94f1cd25eb4a05d49945aa0f2c8d555bb0ce0734..24d86d1b3f2df81aa6a5d07690a762b5c54d8a27 100644 --- a/src/common/pack.h +++ b/src/common/pack.h @@ -86,7 +86,7 @@ void _unpackmem_xmalloc(char **valp, uint16_t *size_valp, void **bufp, int *lenp assert(sizeof(size_valp) == sizeof(uint16_t *));\ assert((bufp) != NULL && *(bufp) != NULL); \ assert((lenp) != NULL); \ - assert(*(lenp) >= sizeof(uint32_t)); \ + assert(*(lenp) >= sizeof(uint16_t)); \ _unpackmem_ptr(valp,(uint16_t *)size_valp,bufp,lenp);\ } while (0) @@ -97,7 +97,7 @@ void _unpackmem_xmalloc(char **valp, uint16_t *size_valp, void **bufp, int *lenp assert(sizeof(size_valp) == sizeof(uint16_t *));\ assert((bufp) != NULL && *(bufp) != NULL); \ assert((lenp) != NULL); \ - assert(*(lenp) >= sizeof(uint32_t)); \ + assert(*(lenp) >= sizeof(uint16_t)); \ _unpackmem_xmalloc(valp,(uint16_t *)size_valp,bufp,lenp);\ } while (0) diff --git a/src/common/slurm.h b/src/common/slurm.h index 53559f372ee8bc284cc094fe6006ade833b37484..46f00c3d41b713b15f5d04ae70c28ba29d9f2269 100644 --- a/src/common/slurm.h +++ b/src/common/slurm.h @@ -53,7 +53,7 @@ extern char *backup_controller; /* name of computer acting as slurm backup contr #define CONFIG_MAGIC 0xc065eded #define NODE_MAGIC 0x0de575ed -#define NO_VAL (-9812) +#define NO_VAL 0x7f7f7f7f struct config_record { uint32_t magic; /* magic cookie to test data integrity */ uint32_t cpus; /* count of cpus running on the node */ @@ -72,7 +72,7 @@ extern char *node_state_string[]; extern time_t last_bitmap_update; /* time of last node creation or deletion */ extern time_t last_node_update; /* time of last update to node records */ struct node_record { - unsigned magic; /* magic cookie to test data integrity */ + uint32_t magic; /* magic cookie to test data integrity */ char name[MAX_NAME_LEN]; /* name of the node. a null name indicates defunct node */ int node_state; /* enum node_states, negative if down */ time_t last_response; /* last response from the node */ @@ -113,24 +113,8 @@ extern char default_part_name[MAX_NAME_LEN]; /* name of default partition */ extern struct part_record *default_part_loc; /* location of default partition */ /* NOTE: change JOB_STRUCT_VERSION value whenever the contents of JOB_STRUCT_FORMAT change */ -#define JOB_STRUCT_VERSION 1 -#define JOB_STRUCT_FORMAT1 "JobId=%s Partition=%s JobName=%s UID=%d Nodes=%s State=%s TimeLimit=%d StartTime=%lx EndTime=%lx Priority=%d\n" -#define JOB_STRUCT_FORMAT2 "JobId=%s Partition=%s JobName=%s UID=%d Nodes=%s State=%s TimeLimit=%d StartTime=%lx EndTime=%lx Priority=%d TotalProcs=%d TotalNodes=%d ReqNodes=%s Features=%s Shared=%d Contiguous=%d MinProcs=%d MinMemory=%d MinTmpDisk=%d Distribution=%d Script=%s ProcsPerTask=%d TotalProcs=%d\n" extern time_t last_job_update; /* time of last update to part records */ -enum job_states { - JOB_PENDING, /* queued waiting for initiation */ - JOB_STAGE_IN, /* allocated resources, not yet running */ - JOB_RUNNING, /* allocated resources and executing */ - JOB_STAGE_OUT, /* completed execution, nodes not yet released */ - JOB_COMPLETE, /* completed execution successfully, nodes released */ - JOB_FAILED, /* completed execution unsuccessfully, nodes released */ - JOB_TIMEOUT, /* terminated on reaching time limit, nodes released */ - JOB_END /* last entry in table */ -}; -enum task_dist { - DIST_BLOCK, /* fill each node in turn */ - DIST_CYCLE /* one task each node, round-robin through nodes */ -}; + /* last entry must be "end", keep in sync with node_state */ extern char *job_state_string[]; @@ -149,14 +133,14 @@ struct job_details { uint32_t num_nodes; /* minimum number of nodes */ char *nodes; /* required nodes */ char *features; /* required features */ - unsigned shared:2; /* 1 if more than one job can execute on a node */ - unsigned contiguous:1; /* requires contiguous nodes, 1=true, 0=false */ + uint16_t shared; /* 1 if more than one job can execute on a node */ + uint16_t contiguous; /* requires contiguous nodes, 1=true, 0=false */ uint32_t min_procs; /* minimum processors per node, MB */ uint32_t min_memory; /* minimum memory per node, MB */ uint32_t min_tmp_disk; /* minimum temporary disk per node, MB */ enum task_dist dist; /* distribution of tasks, 0=fill, 0=cyclic */ char *job_script; /* name of job script to execute */ - uint32_t procs_per_task; /* processors required per task */ + uint16_t procs_per_task; /* processors required per task */ uint32_t total_procs; /* total number of allocated processors, for accounting */ char *node_list; /* comma separated assigned node list (by task) */ time_t submit_time; /* time of submission */ @@ -307,25 +291,6 @@ extern int delete_node_record (char *name); */ extern int delete_part_record (char *name); -/* - * dump_all_job - dump all partition information to a buffer - * input: buffer_ptr - location into which a pointer to the data is to be stored. - * the data buffer is actually allocated by dump_part and the - * calling function must xfree the storage. - * buffer_size - location into which the size of the created buffer is in bytes - * update_time - dump new data only if job records updated since time - * specified, otherwise return empty buffer - * detail - report job_detail only if set - * output: buffer_ptr - the pointer is set to the allocated buffer. - * buffer_size - set to size of the buffer in bytes - * update_time - set to time partition records last updated - * returns 0 if no error, errno otherwise - * global: job_list - global list of job records - * NOTE: the buffer at *buffer_ptr must be xfreed by the caller - */ -extern int dump_all_job (char **buffer_ptr, int *buffer_size, - time_t * update_time, int detail); - /* * find_job_record - return a pointer to the job record with the given job_id * input: job_id - requested job's id @@ -545,6 +510,26 @@ extern int node_name2bitmap (char *node_names, bitstr_t **bitmap); */ extern int node_name2list (char *node_names, char **node_list, int *node_count); +/* + * pack_all_jobs - dump all job information for all jobs in + * machine independent form (for network transmission) + * input: buffer_ptr - location into which a pointer to the data is to be stored. + * the calling function must xfree the storage. + * buffer_size - location into which the size of the created buffer is in bytes + * update_time - dump new data only if job records updated since time + * specified, otherwise return empty buffer + * output: buffer_ptr - the pointer is set to the allocated buffer. + * buffer_size - set to size of the buffer in bytes + * update_time - set to time partition records last updated + * returns 0 if no error, errno otherwise + * global: job_list - global list of job records + * NOTE: the buffer at *buffer_ptr must be xfreed by the caller + * NOTE: change JOB_STRUCT_VERSION in common/slurmlib.h whenever the format changes + * NOTE: change slurm_load_job() in api/job_info.c whenever the data format changes + */ +extern int pack_all_jobs (char **buffer_ptr, int *buffer_size, + time_t * update_time); + /* * pack_all_node - dump all configuration and node information for all nodes in * machine independent form (for network transmission) @@ -582,6 +567,20 @@ extern int pack_all_node (char **buffer_ptr, int *buffer_size, time_t * update_t */ extern int pack_all_part (char **buffer_ptr, int *buffer_size, time_t * update_time); +/* + * pack_job - dump all configuration information about a specific job in + * machine independent form (for network transmission) + * input: dump_job_ptr - pointer to job for which information is requested + * buf_ptr - buffer for job information + * buf_len - byte size of buffer + * output: buf_ptr - advanced to end of data written + * buf_len - byte size remaining in buffer + * return 0 if no error, 1 if buffer too small + * NOTE: change JOB_STRUCT_VERSION in common/slurmlib.h whenever the format changes + * NOTE: change slurm_load_job() in api/job_info.c whenever the data format changes + */ +extern int pack_job (struct job_record *dump_job_ptr, void **buf_ptr, int *buf_len); + /* * pack_node - dump all configuration information about a specific node in * machine independent form (for network transmission) diff --git a/src/common/slurmlib.h b/src/common/slurmlib.h index 0b96d550500b0db0e5cf2d15997ea8848983ea81..94d942f5d274c8716cacd38a41d52c4333d22ce6 100644 --- a/src/common/slurmlib.h +++ b/src/common/slurmlib.h @@ -8,6 +8,7 @@ #define BUILD_SIZE 128 #define BUILD_STRUCT_VERSION 1 #define FEATURE_SIZE 1024 +#define JOB_STRUCT_VERSION 1 #define MAX_ID_LEN 32 #define MAX_NAME_LEN 16 #define NODE_STRUCT_VERSION 1 @@ -19,6 +20,23 @@ /* eg. the maximum count of nodes any job may use in some partition */ #define INFINITE (0xffffffff) +/* last entry must be JOB_END */ +enum job_states { + JOB_PENDING, /* queued waiting for initiation */ + JOB_STAGE_IN, /* allocated resources, not yet running */ + JOB_RUNNING, /* allocated resources and executing */ + JOB_STAGE_OUT, /* completed execution, nodes not yet released */ + JOB_COMPLETE, /* completed execution successfully, nodes released */ + JOB_FAILED, /* completed execution unsuccessfully, nodes released */ + JOB_TIMEOUT, /* terminated on reaching time limit, nodes released */ + JOB_END /* last entry in table */ +}; + +enum task_dist { + DIST_BLOCK, /* fill each node in turn */ + DIST_CYCLE /* one task each node, round-robin through nodes */ +}; + /* last entry must be STATE_END, keep in sync with node_state_string */ /* any value less than or equal to zero is down. if a node was in state */ /* STATE_BUSY and stops responding, its state becomes -(STATE_BUSY), etc. */ @@ -26,7 +44,6 @@ enum node_states { STATE_DOWN, /* node is not responding */ STATE_UNKNOWN, /* node's initial state, unknown */ STATE_IDLE, /* node idle and available for use */ - STATE_STAGE_IN, /* node has been allocated, job not yet running */ STATE_BUSY, /* node has been allocated, job currently */ STATE_DRAINED, /* node idle and not to be allocated future work */ STATE_DRAINING, /* node in use, but not to be allocated future work */ @@ -62,6 +79,37 @@ struct build_buffer { struct build_table *build_table_ptr; }; +struct job_table { + char *job_id; /* job ID */ + char *name; /* name of the job */ + uint32_t user_id; /* user the job runs as */ + uint16_t job_state; /* state of the job, see enum job_states */ + uint32_t time_limit; /* maximum run time in minutes or INFINITE */ + time_t start_time; /* time execution begins, actual or expected*/ + time_t end_time; /* time of termination, actual or expected */ + uint32_t priority; /* relative priority of the job */ + char *nodes; /* comma delimited list of nodes allocated to job */ + char *partition; /* name of assigned partition */ + uint32_t num_procs; /* number of processors required by job */ + uint32_t num_nodes; /* number of nodes required by job */ + uint16_t shared; /* 1 if job can share nodes with other jobs */ + uint16_t contiguous; /* 1 if job requires contiguous nodes */ + uint32_t min_procs; /* minimum processors required per node */ + uint32_t min_memory; /* minimum real memory required per node */ + uint32_t min_tmp_disk; /* minimum temporary disk required per node */ + uint32_t total_procs; /* number of allocated processors */ + char *req_nodes; /* comma separated list of required nodes */ + char *features; /* comma separated list of required features */ + char *job_script; /* pathname of required script */ +}; + +struct job_buffer { + time_t last_update; /* time of last buffer update */ + uint32_t job_count; /* count of entries in node_table */ + void *raw_buffer_ptr; /* raw network buffer info */ + struct job_table *job_table_ptr; +}; + struct node_table { char *name; /* name of the node. a null name indicates defunct node */ uint32_t node_state; /* state of the node, see node_states */ @@ -69,13 +117,23 @@ struct node_table { uint32_t real_memory; /* megabytes of real memory on the node */ uint32_t tmp_disk; /* megabytes of total disk in TMP_FS */ uint32_t weight; /* desirability of use */ - char *features; /* comma delimited feature list */ char *partition; /* partition name */ + uint32_t num_procs; /* required number of processors */ + uint32_t num_nodes; /* required number of nodes */ + uint16_t shared; /* 1 if job willing to share nodes */ + uint16_t contiguous; /* 1 if job requires contiguous nodes */ + uint32_t min_procs; /* minimum processors per node */ + uint32_t min_memory; /* minimum real memory per node */ + uint32_t min_tmp_disk; /* minimum temporary disk per node */ + uint32_t total_procs; /* total processor count allocated to job */ + char *req_nodes; /* list of nodes required by the job */ + char *features; /* list of features required by the job */ + char *job_script; /* pathname of script to execute for the job */ }; struct node_buffer { time_t last_update; /* time of last buffer update */ - int node_count; /* count of entries in node_table */ + uint32_t node_count; /* count of entries in node_table */ void *raw_buffer_ptr; /* raw network buffer info */ struct node_table *node_table_ptr; }; @@ -134,6 +192,12 @@ extern int slurm_cancel (char *job_id); */ extern void slurm_free_build_info (struct build_buffer *build_buffer_ptr); +/* + * slurm_free_job_info - free the job information buffer (if allocated) + * NOTE: buffer is loaded by load_job. + */ +extern void slurm_free_job_info (struct job_buffer *job_buffer_ptr); + /* * slurm_free_node_info - free the node information buffer (if allocated) * NOTE: buffer is loaded by slurm_load_node. @@ -162,6 +226,20 @@ extern int slurm_load_build (time_t update_time, struct build_buffer **build_buffer_ptr); +/* + * slurm_load_job - load the supplied job information buffer for use by info + * gathering APIs if job records have changed since the time specified. + * input: update_time - time of last update + * job_buffer_ptr - place to park job_buffer pointer + * output: job_buffer_ptr - pointer to allocated job_buffer + * returns -1 if no update since update_time, + * 0 if update with no error, + * EINVAL if the buffer (version or otherwise) is invalid, + * ENOMEM if malloc failure + * NOTE: the allocated memory at job_buffer_ptr freed by slurm_free_job_info. + */ +extern int slurm_load_job (time_t update_time, struct job_buffer **job_buffer_ptr); + /* * slurm_load_node - load the supplied node information buffer for use by info * gathering APIs if node records have changed since the time specified. @@ -208,17 +286,6 @@ extern int slurm_load_part (time_t update_time, struct part_buffer **part_buffer */ extern int slurm_submit (char *spec, char **job_id); -/* - * load_job - load the supplied job information buffer for use by info gathering - * APIs if job records have changed since the time specified. - * input: buffer - pointer to job information buffer - * buffer_size - size of buffer - * output: returns 0 if no error, EINVAL if the buffer is invalid, - * ENOMEM if malloc failure - * NOTE: buffer is used by load_job_config and freed by free_job_info. - */ -extern int load_job (time_t * last_update_time); - /* * parse_node_name - parse the node name for regular expressions and return a sprintf format * generate multiple node names as needed. diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index ba00d41a869ec9d5d30af44d49ba95017efc2836..090fe03bffd7f6048d9d0dc3e7342aaa873fc1c9 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -22,6 +22,7 @@ #include <unistd.h> #include "slurm.h" +#include "pack.h" #define BUF_SIZE 1024 @@ -75,6 +76,7 @@ main (int argc, char *argv[]) { } if (error_code < 0) fatal ("slurmctld: error %d from bind\n", errno); + info ("slurmctld ready for service\n"); listen (sockfd, 5); while (1) { @@ -113,31 +115,13 @@ dump_build (char **buffer_ptr, int *buffer_size, time_t last_update) int buf_len, buffer_allocated; char *buffer; void *buf_ptr; - uint16_t backup_len, control_len, epilog_len, init_len, prio_len; - uint16_t prolog_len, server_len, conf_len, tmpfs_len; - uint16_t primary_len, secondary_len; buffer_ptr[0] = NULL; *buffer_size = 0; if (init_time <= last_update) return 0; - backup_len = strlen (BACKUP_LOCATION); - secondary_len = strlen (backup_controller); - control_len = strlen (CONTROL_DAEMON); - primary_len = strlen (control_machine); - epilog_len = strlen (EPILOG); - init_len = strlen (INIT_PROGRAM); - prio_len = strlen (PRIORITIZE); - prolog_len = strlen (PROLOG); - server_len = strlen (SERVER_DAEMON); - conf_len = strlen (SLURM_CONF); - tmpfs_len = strlen (TMP_FS); - - buffer_allocated = (BUF_SIZE + backup_len + control_len + - epilog_len + init_len + prio_len + - prolog_len + server_len + conf_len + - tmpfs_len + primary_len + secondary_len); + buffer_allocated = (BUF_SIZE); buffer = xmalloc(buffer_allocated); buf_ptr = buffer; buf_len = buffer_allocated; @@ -148,23 +132,23 @@ dump_build (char **buffer_ptr, int *buffer_size, time_t last_update) /* write data values */ pack16 ((uint16_t) BACKUP_INTERVAL, &buf_ptr, &buf_len); - packstr (BACKUP_LOCATION, backup_len, &buf_ptr, &buf_len); - packstr (backup_controller, secondary_len, &buf_ptr, &buf_len); - packstr (CONTROL_DAEMON, control_len, &buf_ptr, &buf_len); - packstr (control_machine, primary_len, &buf_ptr, &buf_len); + packstr (BACKUP_LOCATION, &buf_ptr, &buf_len); + packstr (backup_controller, &buf_ptr, &buf_len); + packstr (CONTROL_DAEMON, &buf_ptr, &buf_len); + packstr (control_machine, &buf_ptr, &buf_len); pack16 ((uint16_t) CONTROLLER_TIMEOUT, &buf_ptr, &buf_len); - packstr (EPILOG, epilog_len, &buf_ptr, &buf_len); + packstr (EPILOG, &buf_ptr, &buf_len); pack16 ((uint16_t) FAST_SCHEDULE, &buf_ptr, &buf_len); pack16 ((uint16_t) HASH_BASE, &buf_ptr, &buf_len); pack16 ((uint16_t) HEARTBEAT_INTERVAL, &buf_ptr, &buf_len); - packstr (INIT_PROGRAM, init_len, &buf_ptr, &buf_len); + packstr (INIT_PROGRAM, &buf_ptr, &buf_len); pack16 ((uint16_t) KILL_WAIT, &buf_ptr, &buf_len); - packstr (PRIORITIZE, prio_len, &buf_ptr, &buf_len); - packstr (PROLOG, prolog_len, &buf_ptr, &buf_len); - packstr (SERVER_DAEMON, server_len, &buf_ptr, &buf_len); + packstr (PRIORITIZE, &buf_ptr, &buf_len); + packstr (PROLOG, &buf_ptr, &buf_len); + packstr (SERVER_DAEMON, &buf_ptr, &buf_len); pack16 ((uint16_t) SERVER_TIMEOUT, &buf_ptr, &buf_len); - packstr (SLURM_CONF, conf_len, &buf_ptr, &buf_len); - packstr (TMP_FS, tmpfs_len, &buf_ptr, &buf_len); + packstr (SLURM_CONF, &buf_ptr, &buf_len); + packstr (TMP_FS, &buf_ptr, &buf_len); *buffer_size = (char *)buf_ptr - buffer; xrealloc (buffer, *buffer_size); @@ -179,7 +163,7 @@ dump_build (char **buffer_ptr, int *buffer_size, time_t last_update) */ void slurmctld_req (int sockfd) { - int error_code, detail, in_size, i; + int error_code, in_size, i; char in_line[BUF_SIZE], node_name[MAX_NAME_LEN]; int cpus, real_memory, tmp_disk; char *job_id_ptr, *node_name_ptr, *part_name, *time_stamp; @@ -268,17 +252,13 @@ slurmctld_req (int sockfd) { } else last_update = (time_t) 0; - if (in_line[7] == 'L') - detail = 1; - else - detail = 0; - error_code = dump_all_job (&dump, &dump_size, - &last_update, detail); + + error_code = pack_all_jobs (&dump, &dump_size, &last_update); if (error_code) - info ("slurmctld_req: dump_all_job error %d, time=%ld", + info ("slurmctld_req: pack_all_jobs error %d, time=%ld", error_code, (long) (clock () - start_time)); else - info ("slurmctld_req: dump_all_job returning %d bytes, time=%ld", + info ("slurmctld_req: pack_all_jobs returning %d bytes, time=%ld", dump_size, (long) (clock () - start_time)); if (dump_size == 0) send (sockfd, "nochange", 9, 0); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 225583cd07c6ed63d4ae98afba1149131ef92453..6b384a3457ed4e8cb4a5483bbd13a20599a03e07 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -16,9 +16,11 @@ #include <string.h> #include "list.h" +#include "pack.h" #include "slurm.h" #define BUF_SIZE 1024 +#define MAX_STR_PACK 128 int job_count; /* job's in the system */ List job_list = NULL; /* job_record list */ @@ -27,8 +29,6 @@ static pthread_mutex_t job_mutex = PTHREAD_MUTEX_INITIALIZER; /* lock for job in char *job_state_string[] = { "PENDING", "STAGE_IN", "RUNNING", "STAGE_OUT", "COMPLETED", "FAILED", "TIME_OUT", "END" }; -int dump_job (struct job_record *dump_job_ptr, char *out_line, int out_line_size, - int detail); void list_delete_job (void *job_entry); int list_find_job_id (void *job_entry, void *key); int list_find_job_old (void *job_entry, void *key); @@ -37,9 +37,10 @@ void set_job_prio (struct job_record *job_ptr); #if DEBUG_MODULE /* main is used here for module testing purposes only */ +int main (int argc, char *argv[]) { - int dump_size, error_code, i; + int dump_size, error_code, error_count = 0, i; time_t update_time = (time_t) NULL; struct job_record * job_rec; log_options_t opts = LOG_OPTS_STDERR_ONLY; @@ -49,75 +50,80 @@ main (int argc, char *argv[]) printf("initialize the database and create a few jobs\n"); log_init(argv[0], opts, SYSLOG_FACILITY_DAEMON, NULL); error_code = init_job_conf (); - if (error_code) + if (error_code) { printf ("ERROR: init_job_conf error %d\n", error_code); - + error_count++; + } job_rec = create_job_record(&error_code); if ((job_rec == NULL) || error_code) { - printf("ERROR:create_job_record failure %d\n",error_code); - exit(1); + printf ("ERROR:create_job_record failure %d\n", error_code); + error_count++; + exit(error_count); } - strcpy(job_rec->name, "Name1"); - strcpy(job_rec->partition, "batch"); + strcpy (job_rec->name, "Name1"); + strcpy (job_rec->partition, "batch"); job_rec->details->job_script = xmalloc(20); - strcpy(job_rec->details->job_script, "/bin/hostname"); + strcpy (job_rec->details->job_script, "/bin/hostname"); job_rec->details->num_nodes = 1; job_rec->details->num_procs = 1; set_job_id(job_rec); set_job_prio(job_rec); - strcpy(tmp_id, job_rec->job_id); + strcpy (tmp_id, job_rec->job_id); for (i=1; i<=4; i++) { - job_rec = create_job_record(&error_code); + job_rec = create_job_record (&error_code); if ((job_rec == NULL) || error_code) { - printf("ERROR:create_job_record failure %d\n",error_code); - exit(1); + printf ("ERROR:create_job_record failure %d\n",error_code); + error_count++; + exit (error_count); } - strcpy(job_rec->name, "Name2"); - strcpy(job_rec->partition, "debug"); + strcpy (job_rec->name, "Name2"); + strcpy (job_rec->partition, "debug"); job_rec->details->job_script = xmalloc(20); - strcpy(job_rec->details->job_script, "/bin/hostname"); + strcpy (job_rec->details->job_script, "/bin/hostname"); job_rec->details->num_nodes = i; job_rec->details->num_procs = i; - set_job_id(job_rec); - set_job_prio(job_rec); + set_job_id (job_rec); + set_job_prio (job_rec); } - printf("\nupdate a job record\n"); + printf ("\nupdate a job record\n"); error_code = update_job (tmp_id, update_spec); - if (error_code) + if (error_code) { printf ("ERROR: update_job error %d\n", error_code); + error_count++; + } - error_code = dump_all_job (&dump, &dump_size, &update_time, 1); - if (error_code) + error_code = pack_all_jobs (&dump, &dump_size, &update_time); + if (error_code) { printf ("ERROR: dump_all_job error %d\n", error_code); - else { - printf("\ndump of job info:\n"); - for (i=0; i<dump_size; ) { - printf("%s", &dump[i]); - i += strlen(&dump[i]) + 1; - } - printf("\n"); + error_count++; } if (dump) xfree(dump); job_rec = find_job_record (tmp_id); - if (job_rec == NULL) + if (job_rec == NULL) { printf("find_job_record error 1\n"); + error_count++; + } else - printf("found job %s, script=%s\n", + printf ("found job %s, script=%s\n", job_rec->job_id, job_rec->details->job_script); - error_code = delete_job_record(tmp_id); - if (error_code) + error_code = delete_job_record (tmp_id); + if (error_code) { printf ("ERROR: delete_job_record error %d\n", error_code); + error_count++; + } job_rec = find_job_record (tmp_id); - if (job_rec != NULL) - printf("find_job_record error 2\n"); + if (job_rec != NULL) { + printf ("find_job_record error 2\n"); + error_count++; + } - exit (0); + exit (error_count); } #endif @@ -154,11 +160,9 @@ create_job_record (int *error_code) job_details_point = (struct job_details *) xmalloc (sizeof (struct job_details)); - memset (job_record_point, 0, sizeof (struct job_record)); job_record_point->magic = JOB_MAGIC; job_record_point->details = job_details_point; - memset (job_details_point, 0, sizeof (struct job_details)); job_details_point->magic = DETAILS_MAGIC; job_details_point->submit_time = time (NULL); job_details_point->procs_per_task = 1; @@ -195,199 +199,6 @@ delete_job_record (char *job_id) } -/* - * dump_all_job - dump all partition information to a buffer - * input: buffer_ptr - location into which a pointer to the data is to be stored. - * the data buffer is actually allocated by dump_part and the - * calling function must xfree the storage. - * buffer_size - location into which the size of the created buffer is in bytes - * update_time - dump new data only if job records updated since time - * specified, otherwise return empty buffer - * detail - report job_detail only if set - * output: buffer_ptr - the pointer is set to the allocated buffer. - * buffer_size - set to size of the buffer in bytes - * update_time - set to time partition records last updated - * returns 0 if no error, errno otherwise - * global: job_list - global list of job records - * NOTE: the buffer at *buffer_ptr must be xfreed by the caller - */ -int -dump_all_job (char **buffer_ptr, int *buffer_size, time_t * update_time, - int detail) -{ - ListIterator job_record_iterator; - struct job_record *job_record_point; - char *buffer; - int buffer_offset, buffer_allocated, error_code; - char out_line[BUF_SIZE]; - - buffer_ptr[0] = NULL; - *buffer_size = 0; - buffer = NULL; - buffer_offset = 0; - buffer_allocated = 0; - if (*update_time == last_job_update) - return 0; - - job_record_iterator = list_iterator_create (job_list); - - /* write header, version and time */ - sprintf (out_line, HEAD_FORMAT, (unsigned long) last_job_update, - JOB_STRUCT_VERSION); - if (write_buffer - (&buffer, &buffer_offset, &buffer_allocated, out_line)) - goto cleanup; - - /* write individual job records */ - while ((job_record_point = - (struct job_record *) list_next (job_record_iterator))) { - if (job_record_point->magic != JOB_MAGIC) - fatal ("dump_all_job: data integrity is bad"); - - error_code = dump_job(job_record_point, out_line, BUF_SIZE, detail); - if (error_code != 0) continue; - - if (write_buffer - (&buffer, &buffer_offset, &buffer_allocated, out_line)) - goto cleanup; - } - - list_iterator_destroy (job_record_iterator); - xrealloc (buffer, buffer_offset); - - buffer_ptr[0] = buffer; - *buffer_size = buffer_offset; - *update_time = last_job_update; - return 0; - - cleanup: - list_iterator_destroy (job_record_iterator); - if (buffer) - xfree (buffer); - return EINVAL; -} - - -/* - * dump_job - dump all configuration information about a specific job to a buffer - * input: dump_job_ptr - pointer to job for which information is requested - * out_line - buffer for partition information - * out_line_size - byte size of out_line - * detail - report job_detail only if set - * output: out_line - set to partition information values - * return 0 if no error, 1 if out_line buffer too small - * NOTE: if you make any changes here be sure to increment the value of - * JOB_STRUCT_VERSION and make the corresponding changes to load_part_config - * in api/partition_info.c - */ -int -dump_job (struct job_record *dump_job_ptr, char *out_line, int out_line_size, - int detail) -{ - char *job_id, *name, *partition, *nodes, *req_nodes, *features; - char *job_script; - struct job_details *detail_ptr; - - if (dump_job_ptr->job_id) - job_id = dump_job_ptr->job_id; - else - job_id = "NONE"; - - if (dump_job_ptr->name) - name = dump_job_ptr->name; - else - name = "NONE"; - - if (dump_job_ptr->partition) - partition = dump_job_ptr->partition; - else - partition = "NONE"; - - if (dump_job_ptr->nodes) - nodes = dump_job_ptr->nodes; - else - nodes = "NONE"; - - if (detail == 0 || (dump_job_ptr->details == NULL)) { - if ((strlen(JOB_STRUCT_FORMAT1) + strlen(job_id) + - strlen(partition) + strlen(name) + strlen(nodes) + - strlen(job_state_string[dump_job_ptr->job_state]) + 20) > - out_line_size) { - error ("dump_job: buffer too small for job %s", job_id); - return 1; - } - - sprintf (out_line, JOB_STRUCT_FORMAT1, - job_id, - partition, - name, - (int) dump_job_ptr->user_id, - nodes, - job_state_string[dump_job_ptr->job_state], - dump_job_ptr->time_limit, - (long) dump_job_ptr->start_time, - (long) dump_job_ptr->end_time, - dump_job_ptr->priority); - } - else { - detail_ptr = dump_job_ptr->details; - if (detail_ptr->magic != DETAILS_MAGIC) - fatal ("dump_job: bad detail pointer for job_id %s", job_id); - - if (detail_ptr->nodes) - req_nodes = detail_ptr->nodes; - else - req_nodes = "NONE"; - - if (detail_ptr->features) - features = detail_ptr->features; - else - features = "NONE"; - - if (detail_ptr->job_script) - job_script = detail_ptr->job_script; - else - job_script = "NONE"; - - if ((strlen(JOB_STRUCT_FORMAT1) + strlen(job_id) + - strlen(partition) + strlen(name) + strlen(nodes) + - strlen(job_state_string[dump_job_ptr->job_state]) + - strlen(req_nodes) + strlen(features) + - strlen(job_script) + 20) > out_line_size) { - error ("dump_job: buffer too small for job %s", job_id); - return 1; - } - - sprintf (out_line, JOB_STRUCT_FORMAT2, - job_id, - partition, - name, - (int) dump_job_ptr->user_id, - nodes, - job_state_string[dump_job_ptr->job_state], - dump_job_ptr->time_limit, - (long) dump_job_ptr->start_time, - (long) dump_job_ptr->end_time, - dump_job_ptr->priority, - detail_ptr->num_procs, - detail_ptr->num_nodes, - req_nodes, - features, - detail_ptr->shared, - detail_ptr->contiguous, - detail_ptr->min_procs, - detail_ptr->min_memory, - detail_ptr->min_tmp_disk, - (int) detail_ptr->dist, - job_script, - detail_ptr->procs_per_task, - detail_ptr->total_procs); - } - - return 0; -} - - /* * find_job_record - return a pointer to the job record with the given job_id * input: job_id - requested job's id @@ -880,6 +691,180 @@ list_find_job_old (void *job_entry, void *key) } +/* + * pack_all_jobs - dump all job information for all jobs in + * machine independent form (for network transmission) + * input: buffer_ptr - location into which a pointer to the data is to be stored. + * the calling function must xfree the storage. + * buffer_size - location into which the size of the created buffer is in bytes + * update_time - dump new data only if job records updated since time + * specified, otherwise return empty buffer + * output: buffer_ptr - the pointer is set to the allocated buffer. + * buffer_size - set to size of the buffer in bytes + * update_time - set to time partition records last updated + * returns 0 if no error, errno otherwise + * global: job_list - global list of job records + * NOTE: the buffer at *buffer_ptr must be xfreed by the caller + * NOTE: change JOB_STRUCT_VERSION in common/slurmlib.h whenever the format changes + * NOTE: change slurm_load_job() in api/job_info.c whenever the data format changes + */ +int +pack_all_jobs (char **buffer_ptr, int *buffer_size, time_t * update_time) +{ + ListIterator job_record_iterator; + struct job_record *job_record_point; + int buf_len, buffer_allocated, buffer_offset = 0, error_code; + char *buffer; + void *buf_ptr; + + buffer_ptr[0] = NULL; + *buffer_size = 0; + if (*update_time == last_part_update) + return 0; + + buffer_allocated = (BUF_SIZE*16); + buffer = xmalloc(buffer_allocated); + buf_ptr = buffer; + buf_len = buffer_allocated; + + job_record_iterator = list_iterator_create (job_list); + + /* write haeader: version and time */ + pack32 ((uint32_t) JOB_STRUCT_VERSION, &buf_ptr, &buf_len); + pack32 ((uint32_t) last_job_update, &buf_ptr, &buf_len); + + /* write individual job records */ + while ((job_record_point = + (struct job_record *) list_next (job_record_iterator))) { + if (job_record_point->magic != JOB_MAGIC) + fatal ("dump_all_job: job integrity is bad"); + + error_code = pack_job(job_record_point, &buf_ptr, &buf_len); + if (error_code != 0) continue; + if (buf_len > BUF_SIZE) + continue; + buffer_allocated += (BUF_SIZE*16); + buf_len += (BUF_SIZE*16); + buffer_offset = (char *)buf_ptr - buffer; + xrealloc(buffer, buffer_allocated); + buf_ptr = buffer + buffer_offset; + } + + list_iterator_destroy (job_record_iterator); + buffer_offset = (char *)buf_ptr - buffer; + xrealloc (buffer, buffer_offset); + + buffer_ptr[0] = buffer; + *buffer_size = buffer_offset; + *update_time = last_part_update; + return 0; +} + + +/* + * pack_job - dump all configuration information about a specific job in + * machine independent form (for network transmission) + * input: dump_job_ptr - pointer to job for which information is requested + * buf_ptr - buffer for job information + * buf_len - byte size of buffer + * output: buf_ptr - advanced to end of data written + * buf_len - byte size remaining in buffer + * return 0 if no error, 1 if buffer too small + * NOTE: change JOB_STRUCT_VERSION in common/slurmlib.h whenever the format changes + * NOTE: change slurm_load_job() in api/job_info.c whenever the data format changes + */ +int +pack_job (struct job_record *dump_job_ptr, void **buf_ptr, int *buf_len) +{ + char tmp_str[MAX_STR_PACK]; + struct job_details *detail_ptr; + + if (dump_job_ptr->job_id == NULL || + strlen (dump_job_ptr->job_id) < MAX_STR_PACK) + packstr (dump_job_ptr->job_id, buf_ptr, buf_len); + else { + strncpy(tmp_str, dump_job_ptr->job_id, MAX_STR_PACK); + tmp_str[MAX_STR_PACK-1] = (char) NULL; + packstr (tmp_str, buf_ptr, buf_len); + } + pack32 (dump_job_ptr->user_id, buf_ptr, buf_len); + pack16 ((uint16_t) dump_job_ptr->job_state, buf_ptr, buf_len); + pack32 (dump_job_ptr->time_limit, buf_ptr, buf_len); + + pack32 ((uint32_t) dump_job_ptr->start_time, buf_ptr, buf_len); + pack32 ((uint32_t) dump_job_ptr->end_time, buf_ptr, buf_len); + pack32 (dump_job_ptr->priority, buf_ptr, buf_len); + + packstr (dump_job_ptr->nodes, buf_ptr, buf_len); + packstr (dump_job_ptr->partition, buf_ptr, buf_len); + if (dump_job_ptr->name == NULL || + strlen (dump_job_ptr->name) < MAX_STR_PACK) + packstr (dump_job_ptr->name, buf_ptr, buf_len); + else { + strncpy(tmp_str, dump_job_ptr->name, MAX_STR_PACK); + tmp_str[MAX_STR_PACK-1] = (char) NULL; + packstr (tmp_str, buf_ptr, buf_len); + } + + detail_ptr = dump_job_ptr->details; + if (detail_ptr) { + if (detail_ptr->magic != DETAILS_MAGIC) + fatal ("dump_all_job: job detail integrity is bad"); + pack32 ((uint32_t) detail_ptr->num_procs, buf_ptr, buf_len); + pack32 ((uint32_t) detail_ptr->num_nodes, buf_ptr, buf_len); + pack16 ((uint16_t) detail_ptr->shared, buf_ptr, buf_len); + pack16 ((uint16_t) detail_ptr->contiguous, buf_ptr, buf_len); + + pack32 ((uint32_t) detail_ptr->min_procs, buf_ptr, buf_len); + pack32 ((uint32_t) detail_ptr->min_memory, buf_ptr, buf_len); + pack32 ((uint32_t) detail_ptr->min_tmp_disk, buf_ptr, buf_len); + pack32 ((uint32_t) detail_ptr->total_procs, buf_ptr, buf_len); + + if (detail_ptr->nodes == NULL || + strlen (detail_ptr->nodes) < MAX_STR_PACK) + packstr (detail_ptr->nodes, buf_ptr, buf_len); + else { + strncpy(tmp_str, detail_ptr->nodes, MAX_STR_PACK); + tmp_str[MAX_STR_PACK-1] = (char) NULL; + packstr (tmp_str, buf_ptr, buf_len); + } + if (detail_ptr->features == NULL || + strlen (detail_ptr->features) < MAX_STR_PACK) + packstr (detail_ptr->features, buf_ptr, buf_len); + else { + strncpy(tmp_str, detail_ptr->features, MAX_STR_PACK); + tmp_str[MAX_STR_PACK-1] = (char) NULL; + packstr (tmp_str, buf_ptr, buf_len); + } + if (detail_ptr->job_script == NULL || + strlen (detail_ptr->job_script) < MAX_STR_PACK) + packstr (detail_ptr->job_script, buf_ptr, buf_len); + else { + strncpy(tmp_str, detail_ptr->job_script, MAX_STR_PACK); + tmp_str[MAX_STR_PACK-1] = (char) NULL; + packstr (tmp_str, buf_ptr, buf_len); + } + } + else { + pack32 ((uint32_t) 0, buf_ptr, buf_len); + pack32 ((uint32_t) 0, buf_ptr, buf_len); + pack16 ((uint16_t) 0, buf_ptr, buf_len); + pack16 ((uint16_t) 0, buf_ptr, buf_len); + + pack32 ((uint32_t) 0, buf_ptr, buf_len); + pack32 ((uint32_t) 0, buf_ptr, buf_len); + pack32 ((uint32_t) 0, buf_ptr, buf_len); + pack32 ((uint32_t) 0, buf_ptr, buf_len); + + packstr (NULL, buf_ptr, buf_len); + packstr (NULL, buf_ptr, buf_len); + packstr (NULL, buf_ptr, buf_len); + } + + return 0; +} + + /* * parse_job_specs - pick the appropriate fields out of a job request specification * input: job_specs - string containing the specification diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 3edc12b739788a2ea656551d3811b6ec766a9c53..68f5bf42ced1dce37e474e117ea50a4ab7cf7554 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -44,7 +44,7 @@ void split_node_name (char *name, char *prefix, char *suffix, int *index, int main (int argc, char *argv[]) { - int error_code, node_count, i; + int error_code, error_count, node_count, i; uint32_t total_procs; char *out_line; bitstr_t *map1, *map2, *map3; @@ -69,11 +69,14 @@ main (int argc, char *argv[]) map2 = bit_copy (map1); bit_set (map2, 11); node_record_count = 0; + error_count = 0; /* now check out configuration and node structure functions */ error_code = init_node_conf (); - if (error_code) + if (error_code) { printf ("ERROR: init_node_conf error %d\n", error_code); + error_count++; + } default_config_record.cpus = 12; default_config_record.real_memory = 345; default_config_record.tmp_disk = 67; @@ -81,14 +84,22 @@ main (int argc, char *argv[]) default_node_record.last_response = (time_t) 678; config_ptr = create_config_record (); - if (config_ptr->cpus != 12) + if (config_ptr->cpus != 12) { printf ("ERROR: config default cpus not set\n"); - if (config_ptr->real_memory != 345) + error_count++; + } + if (config_ptr->real_memory != 345) { printf ("ERROR: config default real_memory not set\n"); - if (config_ptr->tmp_disk != 67) + error_count++; + } + if (config_ptr->tmp_disk != 67) { printf ("ERROR: config default tmp_disk not set\n"); - if (config_ptr->weight != 89) + error_count++; + } + if (config_ptr->weight != 89) { printf ("ERROR: config default weight not set\n"); + error_count++; + } config_ptr->feature = "for_lx01,lx02"; config_ptr->nodes = "lx[01-02]"; config_ptr->node_bitmap = map1; @@ -100,11 +111,15 @@ main (int argc, char *argv[]) printf("NOTE: We are setting lx[01-02] to state draining\n"); error_code = update_node ("lx[01-02]", update_spec); - if (error_code) + if (error_code) { printf ("ERROR: update_node error1 %d\n", error_code); - if (node_ptr->node_state != STATE_DRAINING) + error_count++; + } + if (node_ptr->node_state != STATE_DRAINING) { printf ("ERROR: update_node error2 node_state=%d\n", node_ptr->node_state); + error_count++; + } config_ptr = create_config_record (); config_ptr->cpus = 54; @@ -112,19 +127,29 @@ main (int argc, char *argv[]) config_ptr->feature = "for_lx03,lx04"; config_ptr->node_bitmap = map2; node_ptr = create_node_record (config_ptr, "lx03"); - if (node_ptr->last_response != (time_t) 678) + if (node_ptr->last_response != (time_t) 678) { printf ("ERROR: node default last_response not set\n"); - if (node_ptr->cpus != 54) + error_count++; + } + if (node_ptr->cpus != 54) { printf ("ERROR: node default cpus not set\n"); - if (node_ptr->real_memory != 345) + error_count++; + } + if (node_ptr->real_memory != 345) { printf ("ERROR: node default real_memory not set\n"); - if (node_ptr->tmp_disk != 67) + error_count++; + } + if (node_ptr->tmp_disk != 67) { printf ("ERROR: node default tmp_disk not set\n"); + error_count++; + } node_ptr = create_node_record (config_ptr, "lx04"); error_code = node_name2list (node_names, &node_list, &node_count); - if (error_code) + if (error_code) { printf ("ERROR: node_name2list error %d\n", error_code); + error_count++; + } printf("node_name2list for %s generates\n ", node_names); for (i = 0; i < node_count; i++) printf("%s ", &node_list[i*MAX_NAME_LEN]); @@ -132,11 +157,15 @@ main (int argc, char *argv[]) xfree(node_list); error_code = node_name2bitmap ("lx[01-02],lx04", &map3); - if (error_code) + if (error_code) { printf ("ERROR: node_name2bitmap error %d\n", error_code); + error_count++; + } error_code = bitmap2node_name (map3, &out_line); - if (error_code) + if (error_code) { printf ("ERROR: bitmap2node_name error %d\n", error_code); + error_count++; + } if (strcmp (out_line, "lx[01-02],lx04") != 0) printf ("ERROR: bitmap2node_name results bad %s vs %s\n", out_line, "lx[01-02],lx04"); @@ -148,59 +177,71 @@ main (int argc, char *argv[]) xfree (node_list); error_code = validate_node_specs ("lx01", 12, 345, 67); - if (error_code) + if (error_code) { printf ("ERROR: validate_node_specs error1\n"); + error_count++; + } printf("dumping node info\n"); update_time = (time_t) 0; error_code = pack_all_node (&dump, &dump_size, &update_time); - if (error_code) + if (error_code) { printf ("ERROR: pack_all_node error %d\n", error_code); + error_count++; + } if (dump) xfree(dump); update_time = (time_t) 0; - error_code = dump_all_node (&dump, &dump_size, &update_time); - if (error_code) - printf ("ERROR: dump_all_node error %d\n", error_code); - else { - printf("\ndump of node info:\n"); - for (i=0; i<dump_size; ) { - printf("%s", &dump[i]); - i += strlen(&dump[i]) + 1; - } - printf("\n"); + error_code = pack_all_node (&dump, &dump_size, &update_time); + if (error_code) { + printf ("ERROR: pack_all_node error %d\n", error_code); + error_count++; } if (dump) xfree(dump); printf ("NOTE: we expect validate_node_specs to report bad cpu, real_memory and tmp_disk on lx01\n"); error_code = validate_node_specs ("lx01", 1, 2, 3); - if (error_code != EINVAL) + if (error_code != EINVAL) { printf ("ERROR: validate_node_specs error2\n"); + error_count++; + } rehash (); dump_hash (); node_ptr = find_node_record ("lx02"); - if (node_ptr == 0) + if (node_ptr == 0) { printf ("ERROR: find_node_record failure 1\n"); - else if (strcmp (node_ptr->name, "lx02") != 0) + error_count++; + } + else if (strcmp (node_ptr->name, "lx02") != 0) { printf ("ERROR: find_node_record failure 2\n"); - else if (node_ptr->last_response != (time_t) 678) + error_count++; + } + else if (node_ptr->last_response != (time_t) 678) { printf ("ERROR: node default last_response not set\n"); + error_count++; + } printf ("NOTE: we expect delete_node_record to report not finding a record for lx10\n"); error_code = delete_node_record ("lx10"); - if (error_code != ENOENT) + if (error_code != ENOENT) { printf ("ERROR: delete_node_record failure 1\n"); + error_count++; + } error_code = delete_node_record ("lx02"); - if (error_code != 0) + if (error_code != 0) { printf ("ERROR: delete_node_record failure 2\n"); + error_count++; + } printf ("NOTE: we expect find_node_record to report not finding a record for lx02\n"); node_ptr = find_node_record ("lx02"); - if (node_ptr != 0) + if (node_ptr != 0) { printf ("ERROR: find_node_record failure 3\n"); + error_count++; + } - exit (0); + exit (error_count); } #endif @@ -963,39 +1004,19 @@ pack_node (struct node_record *dump_node_ptr, void **buf_ptr, int *buf_len) { int state; char *partition = NULL; - uint16_t feature_size, name_size, partition_size; state = dump_node_ptr->node_state; if (state < 0) state = STATE_DOWN; - name_size = strlen(dump_node_ptr->name) + 1; - - if (dump_node_ptr->config_ptr->feature) - feature_size = strlen(dump_node_ptr->config_ptr->feature) + 1; - else - feature_size = 0; - - if (dump_node_ptr->partition_ptr) { - partition = dump_node_ptr->partition_ptr->name; - partition_size = strlen(dump_node_ptr->partition_ptr->name) + 1; - } - else - partition_size = 0; - - if (name_size + feature_size + partition_size + 40 > *buf_len) { - error ("pack_node: buffer too small for node %s", dump_node_ptr->name); - return 1; - } - packstr (dump_node_ptr->name, name_size, buf_ptr, buf_len); + packstr (dump_node_ptr->name, buf_ptr, buf_len); pack32 (state, buf_ptr, buf_len); pack32 (dump_node_ptr->cpus, buf_ptr, buf_len); pack32 (dump_node_ptr->real_memory, buf_ptr, buf_len); pack32 (dump_node_ptr->tmp_disk, buf_ptr, buf_len); pack32 (dump_node_ptr->config_ptr->weight, buf_ptr, buf_len); - packstr (dump_node_ptr->config_ptr->feature, feature_size, - buf_ptr, buf_len); - packstr (partition, partition_size, buf_ptr, buf_len); + packstr (dump_node_ptr->config_ptr->feature, buf_ptr, buf_len); + packstr (partition, buf_ptr, buf_len); return 0; } @@ -1152,15 +1173,18 @@ update_node (char *node_names, char *spec) if (state_val != NO_VAL) { if ((state_val == STATE_DOWN) && (node_record_point->node_state != STATE_UNKNOWN)) - node_record_point->node_state = -(node_record_point->node_state); + node_record_point->node_state = + -(node_record_point->node_state); else node_record_point->node_state = state_val; if (state_val != STATE_IDLE) bit_clear (idle_node_bitmap, - (int) (node_record_point - node_record_table_ptr)); + (int) (node_record_point - + node_record_table_ptr)); if (state_val == STATE_DOWN) bit_clear (up_node_bitmap, - (int) (node_record_point - node_record_table_ptr)); + (int) (node_record_point - + node_record_table_ptr)); info ("update_node: node %s state set to %s", &node_list[i*MAX_NAME_LEN], node_state_string[state_val]); } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 33e612f6ed11190a54d74ff3b00902b46ac58369..be8eea25c51eadcd13fa985d592943aec891481f 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -22,7 +22,7 @@ #define BUF_SIZE 1024 -struct node_set { /* set of nodes with same configuration that could be allocated */ +struct node_set { /* set of nodes with same configuration */ uint32_t cpus_per_node; uint32_t nodes; uint32_t weight; @@ -42,7 +42,7 @@ int valid_features (char *requested, char *available); int main (int argc, char *argv[]) { - int error_code, line_num, i; + int error_code, error_count = 0, line_num, i; FILE *command_file; char in_line[BUF_SIZE], *job_id, *node_list; log_options_t opts = LOG_OPTS_STDERR_ONLY; @@ -60,14 +60,16 @@ main (int argc, char *argv[]) if (error_code) { printf ("controller: error %d from init_slurm_conf\n", error_code); - exit (error_code); + error_count++; + exit (error_count); } error_code = read_slurm_conf (argv[1]); if (error_code) { printf ("controller: error %d from read_slurm_conf\n", error_code); - exit (error_code); + error_count++; + exit (error_count); } /* mark everything up and idle for testing */ @@ -79,27 +81,40 @@ main (int argc, char *argv[]) fprintf (stderr, "node_scheduler: error %d opening command file %s\n", errno, argv[2]); - exit (1); + error_count++; + exit (error_count); } i = valid_features ("fs1&fs2", "fs1"); - if (i != 0) + if (i != 0) { printf ("valid_features error 1\n"); + error_count++; + } i = valid_features ("fs1|fs2", "fs1"); - if (i != 1) + if (i != 1) { printf ("valid_features error 2\n"); + error_count++; + } i = valid_features ("fs1|fs2&fs3", "fs1,fs3"); - if (i != 1) + if (i != 1) { printf ("valid_features error 3\n"); + error_count++; + } i = valid_features ("[fs1|fs2]&fs3", "fs2,fs3"); - if (i != 2) + if (i != 2) { printf ("valid_features error 4\n"); + error_count++; + } i = valid_features ("fs0&[fs1|fs2]&fs3", "fs2,fs3"); - if (i != 0) + if (i != 0) { printf ("valid_features error 5\n"); + error_count++; + } i = valid_features ("fs3&[fs1|fs2]&fs3", "fs2,fs3"); - if (i != 2) + if (i != 2) { printf ("valid_features error 6\n"); + error_count++; + } line_num = 0; printf ("\n"); @@ -111,15 +126,19 @@ main (int argc, char *argv[]) line_num++; error_code = job_allocate(in_line, &job_id, &node_list); if (error_code) { - if (strncmp (in_line, "JobName=FAIL", 12) != 0) + if (strncmp (in_line, "JobName=FAIL", 12) != 0) { printf ("ERROR:"); + error_count++; + } printf ("for job: %s\n", in_line); printf ("node_scheduler: error %d from job_allocate on line %d\n", error_code, line_num); } else { - if (strncmp (in_line, "JobName=FAIL", 12) == 0) + if (strncmp (in_line, "JobName=FAIL", 12) == 0) { printf ("ERROR: "); + error_count++; + } printf ("for job: %s\n nodes selected %s\n", in_line, node_list); if (job_id) @@ -129,7 +148,7 @@ main (int argc, char *argv[]) } printf("time = %ld usec\n\n", (long) (clock() - start_time)); } - exit (0); + exit (error_count); } #endif @@ -835,8 +854,11 @@ select_nodes (struct job_record *job_ptr) allocate_nodes (req_bitmap); job_ptr->job_state = JOB_STAGE_IN; job_ptr->start_time = time(NULL); - if (job_ptr->time_limit >= 0) - job_ptr->end_time = time(NULL) + (job_ptr->time_limit * 60); + if (job_ptr->time_limit == INFINITE) + job_ptr->end_time = INFINITE; + else + job_ptr->end_time = + job_ptr->start_time + (job_ptr->time_limit * 60); cleanup: if (req_bitmap) diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 16dc0450deabb72076d6bc59e56d40ff8cd25ef3..89a0b2d9d058fe02fef3f92331723582f0c3a6ff 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -37,7 +37,7 @@ int list_find_part (void *part_entry, void *key); int main (int argc, char *argv[]) { - int error_code, i; + int error_code, error_count; time_t update_time; struct part_record *part_ptr; char *dump; @@ -46,13 +46,18 @@ main (int argc, char *argv[]) "MaxTime=34 MaxNodes=56 Key=NO State=DOWN Shared=FORCE"; log_options_t opts = LOG_OPTS_STDERR_ONLY; + error_count = 0; log_init(argv[0], opts, SYSLOG_FACILITY_DAEMON, NULL); error_code = init_node_conf (); - if (error_code) + if (error_code) { printf ("init_node_conf error %d\n", error_code); + error_count++; + } error_code = init_part_conf (); - if (error_code) + if (error_code) { printf ("init_part_conf error %d\n", error_code); + error_count++; + } default_part.max_time = 223344; default_part.max_nodes = 556677; default_part.total_nodes = 4; @@ -62,20 +67,34 @@ main (int argc, char *argv[]) printf ("create some partitions and test defaults\n"); part_ptr = create_part_record (); - if (part_ptr->max_time != 223344) + if (part_ptr->max_time != 223344) { printf ("ERROR: partition default max_time not set\n"); - if (part_ptr->max_nodes != 556677) + error_count++; + } + if (part_ptr->max_nodes != 556677) { printf ("ERROR: partition default max_nodes not set\n"); - if (part_ptr->total_nodes != 4) + error_count++; + } + if (part_ptr->total_nodes != 4) { printf ("ERROR: partition default total_nodes not set\n"); - if (part_ptr->total_cpus != 16) + error_count++; + } + if (part_ptr->total_cpus != 16) { printf ("ERROR: partition default max_nodes not set\n"); - if (part_ptr->key != 1) + error_count++; + } + if (part_ptr->key != 1) { printf ("ERROR: partition default key not set\n"); - if (part_ptr->state_up != 1) + error_count++; + } + if (part_ptr->state_up != 1) { printf ("ERROR: partition default state_up not set\n"); - if (part_ptr->shared != 0) + error_count++; + } + if (part_ptr->shared != 0) { printf ("ERROR: partition default shared not set\n"); + error_count++; + } strcpy (part_ptr->name, "interactive"); part_ptr->nodes = "lx[01-04]"; part_ptr->allow_groups = "students"; @@ -88,46 +107,58 @@ main (int argc, char *argv[]) strcpy (part_ptr->name, "class"); update_time = (time_t) 0; - error_code = dump_all_part (&dump, &dump_size, &update_time); - if (error_code) - printf ("ERROR: dump_part error %d\n", error_code); - else { - printf("\ndump of partition info:\n"); - for (i=0; i<dump_size; ) { - printf("%s", &dump[i]); - i += strlen(&dump[i]) + 1; - } - printf("\n"); + error_code = pack_all_part (&dump, &dump_size, &update_time); + if (error_code) { + printf ("ERROR: pack_part error %d\n", error_code); + error_count++; } error_code = update_part ("batch", update_spec); - if (error_code) + if (error_code) { printf ("ERROR: update_part error %d\n", error_code); + error_count++; + } part_ptr = find_part_record ("batch"); - if (part_ptr == NULL) + if (part_ptr == NULL) { printf ("ERROR: list_find failure\n"); - if (part_ptr->max_time != 34) + error_count++; + } + if (part_ptr->max_time != 34) { printf ("ERROR: update_part max_time not reset\n"); - if (part_ptr->max_nodes != 56) + error_count++; + } + if (part_ptr->max_nodes != 56) { printf ("ERROR: update_part max_nodes not reset\n"); - if (part_ptr->key != 0) + error_count++; + } + if (part_ptr->key != 0) { printf ("ERROR: update_part key not reset\n"); - if (part_ptr->state_up != 0) + error_count++; + } + if (part_ptr->state_up != 0) { printf ("ERROR: update_part state_up not set\n"); - if (part_ptr->shared != 2) + error_count++; + } + if (part_ptr->shared != 2) { printf ("ERROR: update_part shared not set\n"); + error_count++; + } node_record_count = 0; /* delete_part_record dies if node count is bad */ error_code = delete_part_record ("batch"); - if (error_code != 0) + if (error_code != 0) { printf ("delete_part_record error1 %d\n", error_code); + error_count++; + } printf ("NOTE: we expect delete_part_record to report not finding a record for batch\n"); error_code = delete_part_record ("batch"); - if (error_code != ENOENT) + if (error_code != ENOENT) { printf ("ERROR: delete_part_record error2 %d\n", error_code); + error_count++; + } - exit (0); + exit (error_count); } #endif @@ -477,29 +508,13 @@ int pack_part (struct part_record *part_record_point, void **buf_ptr, int *buf_len) { uint16_t default_part_flag; - uint16_t group_size, name_size, node_size; - - if (part_record_point->name) - name_size = strlen(part_record_point->name) + 1; - else - name_size = 0; - - if (part_record_point->allow_groups) - group_size = strlen(part_record_point->allow_groups) + 1; - else - group_size = 0; - - if (part_record_point->nodes) - node_size = strlen(part_record_point->nodes) + 1; - else - node_size = 0; if (default_part_loc == part_record_point) default_part_flag = 1; else default_part_flag = 0; - packstr (part_record_point->name, name_size, buf_ptr, buf_len); + packstr (part_record_point->name, buf_ptr, buf_len); pack32 (part_record_point->max_time, buf_ptr, buf_len); pack32 (part_record_point->max_nodes, buf_ptr, buf_len); pack32 (part_record_point->total_nodes, buf_ptr, buf_len); @@ -508,8 +523,8 @@ pack_part (struct part_record *part_record_point, void **buf_ptr, int *buf_len) pack16 ((uint16_t)part_record_point->key, buf_ptr, buf_len); pack16 ((uint16_t)part_record_point->shared, buf_ptr, buf_len); pack16 ((uint16_t)part_record_point->state_up, buf_ptr, buf_len); - packstr (part_record_point->allow_groups, group_size, buf_ptr, buf_len); - packstr (part_record_point->nodes, node_size, buf_ptr, buf_len); + packstr (part_record_point->allow_groups, buf_ptr, buf_len); + packstr (part_record_point->nodes, buf_ptr, buf_len); return 0; } diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index fbb4c0de700d838f9137f694c8053a6d11e98711..8eeba6917d9649b24feae0bb70128a8020a4011b 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -59,7 +59,7 @@ main (int argc, char *argv[]) { if (error_code) { printf ("error %d from read_slurm_conf\n", error_code); - exit (1); + exit (error_code); } printf ("ControlMachine=%s\n", control_machine);