From b147ae04de813129ec8b2bcc57038dac4c4a86f4 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 22 Nov 2002 17:53:30 +0000 Subject: [PATCH] General code clean-up and documentation. No changes to logic. --- src/slurmctld/Makefile.am | 3 +- src/slurmctld/agent.c | 667 ++++++++++++----------- src/slurmctld/agent.h | 40 +- src/slurmctld/controller.c | 551 ++++++++++--------- src/slurmctld/job_mgr.c | 829 +++++++++++++--------------- src/slurmctld/job_scheduler.c | 211 ++++---- src/slurmctld/locks.c | 187 ++++--- src/slurmctld/locks.h | 15 + src/slurmctld/node_mgr.c | 537 ++++++++++--------- src/slurmctld/node_scheduler.c | 951 +++++++++++++++++---------------- src/slurmctld/pack.c | 143 ----- src/slurmctld/partition_mgr.c | 837 +++++++++++++++-------------- src/slurmctld/read_config.c | 772 ++++++++++++++------------ src/slurmctld/slurmctld.h | 763 ++++++++++++++++++-------- src/slurmctld/step_mgr.c | 278 ++++++++-- 15 files changed, 3685 insertions(+), 3099 deletions(-) delete mode 100644 src/slurmctld/pack.c diff --git a/src/slurmctld/Makefile.am b/src/slurmctld/Makefile.am index 7911a314473..ffb21d52ed8 100644 --- a/src/slurmctld/Makefile.am +++ b/src/slurmctld/Makefile.am @@ -27,8 +27,7 @@ slurmctld_SOURCES = \ node_scheduler.c\ partition_mgr.c \ read_config.c \ - step_mgr.c \ - pack.c + step_mgr.c % : %_d.o $(LINK) $(LDFLAGS) $^ $(LDADD) $(LIBS) diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 7b9e18c60bd..8c1a0db272b 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -29,17 +29,18 @@ * * The functions below permit slurm to initiate parallel tasks as a * detached thread and let the functions below make sure the work happens. - * For example, when a job step completes slurmctld needs to revoke credentials - * for that job step on every node to which it was allocated. We don't want to - * hang slurmctld's primary function (the job complete RPC) to perform this - * work, so it just initiates an agent to perform the work. The agent is passed - * all details required to perform the work, so it will be possible to execute - * the agent as an pthread, process, or even a daemon on some other computer. + * For example, when a job step completes slurmctld needs to revoke + * credentials for that job step on every node to which it was allocated. + * We don't want to hang slurmctld's primary function (the job complete RPC) + * to perform this work, so it just initiates an agent to perform the work. + * The agent is passed all details required to perform the work, so it will + * be possible to execute the agent as an pthread, process, or even a daemon + * on some other computer. * * The main agent thread creates a separate thread for each node to be - * communicated with up to AGENT_THREAD_COUNT. A special watchdog thread sends - * SIGLARM to any threads that have been active (in DSH_ACTIVE state) for more - * than COMMAND_TIMEOUT seconds. + * communicated with up to AGENT_THREAD_COUNT. A special watchdog thread + * sendsSIGLARM to any threads that have been active (in DSH_ACTIVE state) + * for more than COMMAND_TIMEOUT seconds. * The agent responds to slurmctld via an RPC as required. * For example, informing slurmctld that some node is not responding. * @@ -48,7 +49,7 @@ \*****************************************************************************/ #ifdef HAVE_CONFIG_H -# include <config.h> +# include "config.h" #endif #include <errno.h> @@ -57,75 +58,75 @@ #include <string.h> #include <unistd.h> -#include <src/common/list.h> -#include <src/common/log.h> -#include <src/common/slurm_protocol_defs.h> -#include <src/common/xmalloc.h> -#include <src/common/xstring.h> -#include <src/slurmctld/agent.h> -#include <src/slurmctld/locks.h> +#include "src/common/list.h" +#include "src/common/log.h" +#include "src/common/macros.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" +#include "src/slurmctld/agent.h" +#include "src/slurmctld/locks.h" #if COMMAND_TIMEOUT == 1 -#define WDOG_POLL 1 /* secs */ +# define WDOG_POLL 1 /* secs */ #else -#define WDOG_POLL 2 /* secs */ +# define WDOG_POLL 2 /* secs */ #endif -typedef enum {DSH_NEW, DSH_ACTIVE, DSH_DONE, DSH_FAILED} state_t; +typedef enum { DSH_NEW, DSH_ACTIVE, DSH_DONE, DSH_FAILED } state_t; typedef struct thd { - pthread_t thread; /* thread ID */ - pthread_attr_t attr; /* thread attributes */ - state_t state; /* thread state */ - time_t time; /* start time or delta time - * at termination */ - struct sockaddr_in slurm_addr; /* network address */ - char node_name[MAX_NAME_LEN];/* node's name */ + pthread_t thread; /* thread ID */ + pthread_attr_t attr; /* thread attributes */ + state_t state; /* thread state */ + time_t time; /* start time or delta time + * at termination */ + struct sockaddr_in slurm_addr; /* network address */ + char node_name[MAX_NAME_LEN]; /* node's name */ } thd_t; typedef struct agent_info { - pthread_mutex_t thread_mutex; /* agent specific mutex */ - pthread_cond_t thread_cond; /* agent specific condition */ - uint32_t thread_count; /* number of threads records */ - uint32_t threads_active; /* currently active threads */ - uint16_t retry; /* if set, keep trying */ - thd_t *thread_struct; /* thread structures */ - slurm_msg_type_t msg_type; /* RPC to be issued */ - void **msg_args_pptr; /* RPC data to be used */ + pthread_mutex_t thread_mutex; /* agent specific mutex */ + pthread_cond_t thread_cond; /* agent specific condition */ + uint32_t thread_count; /* number of threads records */ + uint32_t threads_active; /* currently active threads */ + uint16_t retry; /* if set, keep trying */ + thd_t *thread_struct; /* thread structures */ + slurm_msg_type_t msg_type; /* RPC to be issued */ + void **msg_args_pptr; /* RPC data to be used */ } agent_info_t; typedef struct task_info { - pthread_mutex_t *thread_mutex_ptr; /* pointer to agent specific - * mutex */ - pthread_cond_t *thread_cond_ptr; /* pointer to agent specific - * condition */ - uint32_t *threads_active_ptr; /* currently active thread ptr */ - thd_t *thread_struct_ptr; /* thread structures ptr */ - slurm_msg_type_t msg_type; /* RPC to be issued */ - void *msg_args_ptr; /* ptr to RPC data to be used */ + pthread_mutex_t *thread_mutex_ptr; /* pointer to agent specific + * mutex */ + pthread_cond_t *thread_cond_ptr;/* pointer to agent specific + * condition */ + uint32_t *threads_active_ptr; /* currently active thread ptr */ + thd_t *thread_struct_ptr; /* thread structures ptr */ + slurm_msg_type_t msg_type; /* RPC to be issued */ + void *msg_args_ptr; /* ptr to RPC data to be used */ } task_info_t; static void _alarm_handler(int dummy); -static void _list_delete_retry (void *retry_entry); -static void _queue_agent_retry (agent_info_t *agent_info_ptr, int count); +static void _list_delete_retry(void *retry_entry); +static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count); static void _slurmctld_free_job_launch_msg(batch_job_launch_msg_t * msg); -static void _spawn_retry_agent (agent_arg_t *agent_arg_ptr); -static void *_thread_per_node_rpc (void *args); -static void *_wdog (void *args); -static void _xsignal(int signal, void (*handler)(int)); +static void _spawn_retry_agent(agent_arg_t * agent_arg_ptr); +static void *_thread_per_node_rpc(void *args); +static void *_wdog(void *args); +static void _xsignal(int signal, void (*handler) (int)); -/* retry RPC data structures */ -pthread_mutex_t retry_mutex = PTHREAD_MUTEX_INITIALIZER; -List retry_list = NULL; /* agent_arg_t list for retry */ +static pthread_mutex_t retry_mutex = PTHREAD_MUTEX_INITIALIZER; +static List retry_list = NULL; /* agent_arg_t list for retry */ /* * agent - party responsible for transmitting an common RPC in parallel * across a set of nodes - * input: pointer to agent_arg_t, which is xfree'd (including slurm_addr, + * IN pointer to agent_arg_t, which is xfree'd (including slurm_addr, * node_names and msg_args) upon completion if AGENT_IS_THREAD is set + * RET always NULL (function format just for use as pthread) */ -void * -agent (void *args) +void *agent(void *args) { int i, rc; pthread_attr_t attr_wdog; @@ -137,157 +138,163 @@ agent (void *args) /* basic argument value tests */ if (agent_arg_ptr == NULL) - fatal ("agent NULL argument"); + fatal("agent NULL argument"); if (agent_arg_ptr->node_count == 0) goto cleanup; /* no messages to be sent */ if (agent_arg_ptr->slurm_addr == NULL) - fatal ("agent passed NULL address list"); + fatal("agent passed NULL address list"); if (agent_arg_ptr->node_names == NULL) - fatal ("agent passed NULL node name list"); + fatal("agent passed NULL node name list"); if ((agent_arg_ptr->msg_type != REQUEST_REVOKE_JOB_CREDENTIAL) && - (agent_arg_ptr->msg_type != REQUEST_NODE_REGISTRATION_STATUS) && - (agent_arg_ptr->msg_type != REQUEST_PING) && - (agent_arg_ptr->msg_type != REQUEST_BATCH_JOB_LAUNCH)) - fatal ("agent passed invalid message type %d", - agent_arg_ptr->msg_type); + (agent_arg_ptr->msg_type != REQUEST_NODE_REGISTRATION_STATUS) + && (agent_arg_ptr->msg_type != REQUEST_PING) + && (agent_arg_ptr->msg_type != REQUEST_BATCH_JOB_LAUNCH)) + fatal("agent passed invalid message type %d", + agent_arg_ptr->msg_type); /* initialize the data structures */ - agent_info_ptr = xmalloc (sizeof (agent_info_t)); - if (pthread_mutex_init (&agent_info_ptr->thread_mutex, NULL)) - fatal (" pthread_mutex_init error %m"); - if (pthread_cond_init (&agent_info_ptr->thread_cond, NULL)) - fatal ("pthread_cond_init error %m"); + agent_info_ptr = xmalloc(sizeof(agent_info_t)); + slurm_mutex_init(&agent_info_ptr->thread_mutex); + if (pthread_cond_init(&agent_info_ptr->thread_cond, NULL)) + fatal("pthread_cond_init error %m"); agent_info_ptr->thread_count = agent_arg_ptr->node_count; agent_info_ptr->retry = agent_arg_ptr->retry; agent_info_ptr->threads_active = 0; - thread_ptr = xmalloc (agent_arg_ptr->node_count * sizeof (thd_t)); + thread_ptr = xmalloc(agent_arg_ptr->node_count * sizeof(thd_t)); agent_info_ptr->thread_struct = thread_ptr; agent_info_ptr->msg_type = agent_arg_ptr->msg_type; agent_info_ptr->msg_args_pptr = &agent_arg_ptr->msg_args; for (i = 0; i < agent_info_ptr->thread_count; i++) { thread_ptr[i].state = DSH_NEW; thread_ptr[i].slurm_addr = agent_arg_ptr->slurm_addr[i]; - strncpy (thread_ptr[i].node_name, - &agent_arg_ptr->node_names[i*MAX_NAME_LEN], - MAX_NAME_LEN); + strncpy(thread_ptr[i].node_name, + &agent_arg_ptr->node_names[i * MAX_NAME_LEN], + MAX_NAME_LEN); } /* start the watchdog thread */ - if (pthread_attr_init (&attr_wdog)) - fatal ("pthread_attr_init error %m"); - if (pthread_attr_setdetachstate (&attr_wdog, PTHREAD_CREATE_JOINABLE)) - error ("pthread_attr_setdetachstate error %m"); + if (pthread_attr_init(&attr_wdog)) + fatal("pthread_attr_init error %m"); + if (pthread_attr_setdetachstate + (&attr_wdog, PTHREAD_CREATE_JOINABLE)) + error("pthread_attr_setdetachstate error %m"); #ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (&attr_wdog, PTHREAD_SCOPE_SYSTEM)) - error ("pthread_attr_setscope error %m"); + if (pthread_attr_setscope(&attr_wdog, PTHREAD_SCOPE_SYSTEM)) + error("pthread_attr_setscope error %m"); #endif - if (pthread_create (&thread_wdog, &attr_wdog, _wdog, - (void *)agent_info_ptr)) { - error ("pthread_create error %m"); - sleep (1); /* sleep and try once more */ - if (pthread_create (&thread_wdog, &attr_wdog, _wdog, args)) - fatal ("pthread_create error %m"); + if (pthread_create(&thread_wdog, &attr_wdog, _wdog, + (void *) agent_info_ptr)) { + error("pthread_create error %m"); + sleep(1); /* sleep and try once more */ + if (pthread_create(&thread_wdog, &attr_wdog, _wdog, args)) + fatal("pthread_create error %m"); } - #if AGENT_THREAD_COUNT < 1 - fatal ("AGENT_THREAD_COUNT value is invalid"); + fatal("AGENT_THREAD_COUNT value is invalid"); #endif /* start all the other threads (up to AGENT_THREAD_COUNT active) */ for (i = 0; i < agent_info_ptr->thread_count; i++) { - - /* wait until "room" for another thread */ - pthread_mutex_lock (&agent_info_ptr->thread_mutex); - while (agent_info_ptr->threads_active >= AGENT_THREAD_COUNT) { - pthread_cond_wait (&agent_info_ptr->thread_cond, - &agent_info_ptr->thread_mutex); + + /* wait until "room" for another thread */ + slurm_mutex_lock(&agent_info_ptr->thread_mutex); + while (agent_info_ptr->threads_active >= + AGENT_THREAD_COUNT) { + pthread_cond_wait(&agent_info_ptr->thread_cond, + &agent_info_ptr->thread_mutex); } - + /* create thread specific data, NOTE freed from - * _thread_per_node_rpc() */ - task_specific_ptr = - xmalloc (sizeof (task_info_t)); - task_specific_ptr->thread_mutex_ptr = - &agent_info_ptr->thread_mutex; - task_specific_ptr->thread_cond_ptr = - &agent_info_ptr->thread_cond; - task_specific_ptr->threads_active_ptr = - &agent_info_ptr->threads_active; - task_specific_ptr->thread_struct_ptr = &thread_ptr[i]; - task_specific_ptr->msg_type = - agent_info_ptr->msg_type; - task_specific_ptr->msg_args_ptr = - *agent_info_ptr->msg_args_pptr; - - if (pthread_attr_init (&thread_ptr[i].attr)) - fatal ("pthread_attr_init error %m"); - if (pthread_attr_setdetachstate (&thread_ptr[i].attr, - PTHREAD_CREATE_DETACHED)) - error ("pthread_attr_setdetachstate error %m"); + * _thread_per_node_rpc() */ + task_specific_ptr = xmalloc(sizeof(task_info_t)); + task_specific_ptr->thread_mutex_ptr = + &agent_info_ptr->thread_mutex; + task_specific_ptr->thread_cond_ptr = + &agent_info_ptr->thread_cond; + task_specific_ptr->threads_active_ptr = + &agent_info_ptr->threads_active; + task_specific_ptr->thread_struct_ptr = &thread_ptr[i]; + task_specific_ptr->msg_type = agent_info_ptr->msg_type; + task_specific_ptr->msg_args_ptr = + *agent_info_ptr->msg_args_pptr; + + if (pthread_attr_init(&thread_ptr[i].attr)) + fatal("pthread_attr_init error %m"); + if (pthread_attr_setdetachstate(&thread_ptr[i].attr, + PTHREAD_CREATE_DETACHED)) + error("pthread_attr_setdetachstate error %m"); #ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (&thread_ptr[i].attr, - PTHREAD_SCOPE_SYSTEM)) - error ("pthread_attr_setscope error %m"); + if (pthread_attr_setscope(&thread_ptr[i].attr, + PTHREAD_SCOPE_SYSTEM)) + error("pthread_attr_setscope error %m"); #endif - while ( (rc = pthread_create (&thread_ptr[i].thread, - &thread_ptr[i].attr, - _thread_per_node_rpc, - (void *) task_specific_ptr)) ) { - error ("pthread_create error %m"); + while ((rc = pthread_create(&thread_ptr[i].thread, + &thread_ptr[i].attr, + _thread_per_node_rpc, + (void *) task_specific_ptr))) { + error("pthread_create error %m"); if (agent_info_ptr->threads_active) - pthread_cond_wait (&agent_info_ptr->thread_cond, - &agent_info_ptr->thread_mutex); + pthread_cond_wait(&agent_info_ptr-> + thread_cond, + &agent_info_ptr-> + thread_mutex); else { - pthread_mutex_unlock (&agent_info_ptr->thread_mutex); - sleep (1); - pthread_mutex_lock (&agent_info_ptr->thread_mutex); + slurm_mutex_unlock(&agent_info_ptr-> + thread_mutex); + sleep(1); + slurm_mutex_lock(&agent_info_ptr-> + thread_mutex); } } agent_info_ptr->threads_active++; - pthread_mutex_unlock (&agent_info_ptr->thread_mutex); - } + slurm_mutex_unlock(&agent_info_ptr->thread_mutex); + } /* wait for termination of remaining threads */ - pthread_join (thread_wdog, NULL); + pthread_join(thread_wdog, NULL); -cleanup: + cleanup: #if AGENT_IS_THREAD if (agent_arg_ptr) { if (agent_arg_ptr->slurm_addr) - xfree (agent_arg_ptr->slurm_addr); + xfree(agent_arg_ptr->slurm_addr); if (agent_arg_ptr->node_names) - xfree (agent_arg_ptr->node_names); + xfree(agent_arg_ptr->node_names); if (agent_arg_ptr->msg_args) { - if (agent_arg_ptr->msg_type == REQUEST_BATCH_JOB_LAUNCH) - _slurmctld_free_job_launch_msg (agent_arg_ptr->msg_args); + if (agent_arg_ptr->msg_type == + REQUEST_BATCH_JOB_LAUNCH) + _slurmctld_free_job_launch_msg + (agent_arg_ptr->msg_args); else - xfree (agent_arg_ptr->msg_args); + xfree(agent_arg_ptr->msg_args); } - xfree (agent_arg_ptr); + xfree(agent_arg_ptr); } #endif - + if (agent_info_ptr) { if (agent_info_ptr->thread_struct) - xfree (agent_info_ptr->thread_struct); - xfree (agent_info_ptr); + xfree(agent_info_ptr->thread_struct); + xfree(agent_info_ptr); } return NULL; } /* - * _wdog - Watchdog thread. Send SIGALRM to threads which have been active for too long. - * Sleep for WDOG_POLL seconds between polls. + * _wdog - Watchdog thread. Send SIGALRM to threads which have been active + * for too long. + * IN args - pointer to agent_info_t with info on threads to watch + * Sleep for WDOG_POLL seconds between polls. */ -static void * -_wdog (void *args) +static void *_wdog(void *args) { int i, fail_cnt, work_done, delay, max_delay = 0; agent_info_t *agent_ptr = (agent_info_t *) args; thd_t *thread_ptr = agent_ptr->thread_struct; #if AGENT_IS_THREAD /* Locks: Write job and write node */ - slurmctld_lock_t node_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; + slurmctld_lock_t node_write_lock = + { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; #else int done_cnt; char *slurm_names; @@ -296,215 +303,222 @@ _wdog (void *args) while (1) { work_done = 1; /* assume all threads complete for now */ fail_cnt = 0; /* assume all threads complete sucessfully for now */ - sleep (WDOG_POLL); + sleep(WDOG_POLL); - pthread_mutex_lock (&agent_ptr->thread_mutex); + slurm_mutex_lock(&agent_ptr->thread_mutex); for (i = 0; i < agent_ptr->thread_count; i++) { switch (thread_ptr[i].state) { - case DSH_ACTIVE: - work_done = 0; - delay = difftime (time (NULL), - thread_ptr[i].time); - if ( delay >= COMMAND_TIMEOUT) - pthread_kill(thread_ptr[i].thread, - SIGALRM); - break; - case DSH_NEW: - work_done = 0; - break; - case DSH_DONE: - if ( max_delay < (int) thread_ptr[i].time ) - max_delay = (int) thread_ptr[i].time; - break; - case DSH_FAILED: - fail_cnt++; - break; + case DSH_ACTIVE: + work_done = 0; + delay = difftime(time(NULL), + thread_ptr[i].time); + if (delay >= COMMAND_TIMEOUT) + pthread_kill(thread_ptr[i].thread, + SIGALRM); + break; + case DSH_NEW: + work_done = 0; + break; + case DSH_DONE: + if (max_delay < (int) thread_ptr[i].time) + max_delay = + (int) thread_ptr[i].time; + break; + case DSH_FAILED: + fail_cnt++; + break; } } if (work_done) break; - pthread_mutex_unlock (&agent_ptr->thread_mutex); + slurm_mutex_unlock(&agent_ptr->thread_mutex); } /* Notify slurmctld of non-responding nodes */ if (fail_cnt) { #if AGENT_IS_THREAD /* Update node table data for non-responding nodes */ - lock_slurmctld (node_write_lock); + lock_slurmctld(node_write_lock); for (i = 0; i < agent_ptr->thread_count; i++) { if (thread_ptr[i].state == DSH_FAILED) - node_not_resp (thread_ptr[i].node_name); + node_not_resp(thread_ptr[i].node_name); } - unlock_slurmctld (node_write_lock); + unlock_slurmctld(node_write_lock); #else /* Build a list of all non-responding nodes and send * it to slurmctld */ - slurm_names = xmalloc (fail_cnt * MAX_NAME_LEN); + slurm_names = xmalloc(fail_cnt * MAX_NAME_LEN); fail_cnt = 0; for (i = 0; i < agent_ptr->thread_count; i++) { if (thread_ptr[i].state == DSH_FAILED) { - strncpy (&slurm_names[MAX_NAME_LEN * fail_cnt], - thread_ptr[i].node_name, MAX_NAME_LEN); - error ("agent/_wdog: node %s failed to respond", - thread_ptr[i].node_name); + strncpy(&slurm_names + [MAX_NAME_LEN * fail_cnt], + thread_ptr[i].node_name, + MAX_NAME_LEN); + error + ("agent/_wdog: node %s failed to respond", + thread_ptr[i].node_name); fail_cnt++; } } /* send RPC */ - fatal ("Code development needed here if agent is not thread"); + fatal + ("Code development needed here if agent is not thread"); - xfree (slurm_names); + xfree(slurm_names); #endif if (agent_ptr->retry) - _queue_agent_retry (agent_ptr, fail_cnt); + _queue_agent_retry(agent_ptr, fail_cnt); } - #if AGENT_IS_THREAD /* Update last_response on responding nodes */ - lock_slurmctld (node_write_lock); + lock_slurmctld(node_write_lock); for (i = 0; i < agent_ptr->thread_count; i++) { if (thread_ptr[i].state == DSH_DONE) - node_did_resp (thread_ptr[i].node_name); + node_did_resp(thread_ptr[i].node_name); } - unlock_slurmctld (node_write_lock); + unlock_slurmctld(node_write_lock); #else /* Build a list of all responding nodes and send it to slurmctld to * update time stamps */ done_cnt = agent_ptr->thread_count - fail_cnt; - slurm_names = xmalloc (done_cnt * MAX_NAME_LEN); + slurm_names = xmalloc(done_cnt * MAX_NAME_LEN); done_cnt = 0; for (i = 0; i < agent_ptr->thread_count; i++) { if (thread_ptr[i].state == DSH_DONE) { - strncpy (&slurm_names[MAX_NAME_LEN * done_cnt], - thread_ptr[i].node_name, MAX_NAME_LEN); + strncpy(&slurm_names[MAX_NAME_LEN * done_cnt], + thread_ptr[i].node_name, MAX_NAME_LEN); done_cnt++; } } /* send RPC */ - fatal ("Code development needed here if agent is not thread"); + fatal("Code development needed here if agent is not thread"); - xfree (slurm_addr); + xfree(slurm_addr); #endif if (max_delay) - debug ("agent maximum delay %d seconds", max_delay); + debug("agent maximum delay %d seconds", max_delay); - pthread_mutex_unlock (&agent_ptr->thread_mutex); + slurm_mutex_unlock(&agent_ptr->thread_mutex); return (void *) NULL; } -/* _thread_per_node_rpc - thread to revoke a credential on a collection of nodes - * This xfrees the argument passed to it */ -static void * -_thread_per_node_rpc (void *args) +/* + * _thread_per_node_rpc - thread to revoke a credential on a collection + * of nodes + * IN/OUT args - pointer to task_info_t, xfree'd on completion + */ +static void *_thread_per_node_rpc(void *args) { - int msg_size ; - int rc ; - slurm_fd sockfd ; - slurm_msg_t request_msg ; - slurm_msg_t response_msg ; - return_code_msg_t * slurm_rc_msg ; + int msg_size; + int rc; + slurm_fd sockfd; + slurm_msg_t request_msg; + slurm_msg_t response_msg; + return_code_msg_t *slurm_rc_msg; task_info_t *task_ptr = (task_info_t *) args; thd_t *thread_ptr = task_ptr->thread_struct_ptr; - state_t thread_state = DSH_FAILED; + state_t thread_state = DSH_FAILED; sigset_t set; /* set up SIGALRM handler */ - if (sigemptyset (&set)) - error ("sigemptyset error: %m"); - if (sigaddset (&set, SIGALRM)) - error ("sigaddset error on SIGALRM: %m"); - if (sigprocmask (SIG_UNBLOCK, &set, NULL) != 0) - fatal ("sigprocmask error: %m"); + if (sigemptyset(&set)) + error("sigemptyset error: %m"); + if (sigaddset(&set, SIGALRM)) + error("sigaddset error on SIGALRM: %m"); + if (sigprocmask(SIG_UNBLOCK, &set, NULL) != 0) + fatal("sigprocmask error: %m"); _xsignal(SIGALRM, _alarm_handler); if (args == NULL) - fatal ("_thread_per_node_rpc has NULL argument"); - pthread_mutex_lock (task_ptr->thread_mutex_ptr); + fatal("_thread_per_node_rpc has NULL argument"); + slurm_mutex_lock(task_ptr->thread_mutex_ptr); thread_ptr->state = DSH_ACTIVE; - thread_ptr->time = time (NULL); - pthread_mutex_unlock (task_ptr->thread_mutex_ptr); + thread_ptr->time = time(NULL); + slurm_mutex_unlock(task_ptr->thread_mutex_ptr); /* init message connection for message communication */ - if ( ( sockfd = slurm_open_msg_conn (& thread_ptr->slurm_addr) ) - == SLURM_SOCKET_ERROR ) { - error ("_thread_per_node_rpc/slurm_open_msg_conn error %m"); + if ((sockfd = slurm_open_msg_conn(&thread_ptr->slurm_addr)) + == SLURM_SOCKET_ERROR) { + error("_thread_per_node_rpc/slurm_open_msg_conn error %m"); goto cleanup; } /* send request message */ - request_msg . msg_type = task_ptr->msg_type ; - request_msg . data = task_ptr->msg_args_ptr ; - if ( ( rc = slurm_send_node_msg ( sockfd , & request_msg ) ) - == SLURM_SOCKET_ERROR ) { - error ("_thread_per_node_rpc/slurm_send_node_msg error %m"); + request_msg.msg_type = task_ptr->msg_type; + request_msg.data = task_ptr->msg_args_ptr; + if ((rc = slurm_send_node_msg(sockfd, &request_msg)) + == SLURM_SOCKET_ERROR) { + error("_thread_per_node_rpc/slurm_send_node_msg error %m"); goto cleanup; } /* receive message */ - if ( ( msg_size = slurm_receive_msg ( sockfd , & response_msg ) ) - == SLURM_SOCKET_ERROR ) { - error ("_thread_per_node_rpc/slurm_receive_msg error %m"); + if ((msg_size = slurm_receive_msg(sockfd, &response_msg)) + == SLURM_SOCKET_ERROR) { + error("_thread_per_node_rpc/slurm_receive_msg error %m"); goto cleanup; } /* shutdown message connection */ - if ( ( rc = slurm_shutdown_msg_conn ( sockfd ) ) == SLURM_SOCKET_ERROR ) { - error ("_thread_per_node_rpc/slurm_shutdown_msg_conn error %m"); + if ((rc = slurm_shutdown_msg_conn(sockfd)) == SLURM_SOCKET_ERROR) { + error + ("_thread_per_node_rpc/slurm_shutdown_msg_conn error %m"); goto cleanup; } - if ( msg_size ) { - error ("_thread_per_node_rpc/msg_size error %d", msg_size); + if (msg_size) { + error("_thread_per_node_rpc/msg_size error %d", msg_size); goto cleanup; } - switch ( response_msg . msg_type ) - { - case RESPONSE_SLURM_RC: - slurm_rc_msg = ( return_code_msg_t * ) response_msg . data ; - rc = slurm_rc_msg->return_code; - slurm_free_return_code_msg ( slurm_rc_msg ); - if (rc) - error ("_thread_per_node_rpc/rc error: %s", - slurm_strerror (rc)); /* Don't use %m */ - else { - debug3 ("agent sucessfully processed RPC to node %s", - thread_ptr->node_name); - } - thread_state = DSH_DONE; - break ; - default: - error ("_thread_per_node_rpc bad msg_type %d", - response_msg.msg_type); - break ; + switch (response_msg.msg_type) { + case RESPONSE_SLURM_RC: + slurm_rc_msg = (return_code_msg_t *) response_msg.data; + rc = slurm_rc_msg->return_code; + slurm_free_return_code_msg(slurm_rc_msg); + if (rc) + error("_thread_per_node_rpc/rc error: %s", + slurm_strerror(rc)); /* Don't use %m */ + else { + debug3 + ("agent sucessfully processed RPC to node %s", + thread_ptr->node_name); + } + thread_state = DSH_DONE; + break; + default: + error("_thread_per_node_rpc bad msg_type %d", + response_msg.msg_type); + break; } -cleanup: - pthread_mutex_lock (task_ptr->thread_mutex_ptr); + cleanup: + slurm_mutex_lock(task_ptr->thread_mutex_ptr); thread_ptr->state = thread_state; - thread_ptr->time = (time_t) difftime (time (NULL), thread_ptr->time); + thread_ptr->time = (time_t) difftime(time(NULL), thread_ptr->time); /* Signal completion so another thread can replace us */ (*task_ptr->threads_active_ptr)--; pthread_cond_signal(task_ptr->thread_cond_ptr); - pthread_mutex_unlock (task_ptr->thread_mutex_ptr); + slurm_mutex_unlock(task_ptr->thread_mutex_ptr); - xfree (args); + xfree(args); return (void *) NULL; } /* * Emulate signal() but with BSD semantics (i.e. don't restore signal to - * SIGDFL prior to executing handler). + * SIGDFL prior to executing handler). */ -static void _xsignal(int signal, void (*handler)(int)) +static void _xsignal(int signal, void (*handler) (int)) { struct sigaction sa, old_sa; sa.sa_handler = handler; - sigemptyset(&sa.sa_mask); - sigaddset(&sa.sa_mask, signal); + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, signal); sa.sa_flags = 0; sigaction(signal, &sa, &old_sa); } @@ -519,9 +533,12 @@ static void _alarm_handler(int dummy) } -/* _queue_agent_retry - Queue any failed RPCs for later replay */ -static void -_queue_agent_retry (agent_info_t *agent_info_ptr, int count) +/* + * _queue_agent_retry - Queue any failed RPCs for later replay + * IN agent_info_ptr - pointer to info on completed agent requests + * IN count - number of agent requests which failed, count to requeue + */ +static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count) { agent_arg_t *agent_arg_ptr; thd_t *thread_ptr = agent_info_ptr->thread_struct; @@ -531,107 +548,112 @@ _queue_agent_retry (agent_info_t *agent_info_ptr, int count) return; /* build agent argument with just the RPCs to retry */ - agent_arg_ptr = xmalloc (sizeof (agent_arg_t)); - agent_arg_ptr -> node_count = count; - agent_arg_ptr -> retry = 1; - agent_arg_ptr -> slurm_addr = xmalloc (sizeof (struct sockaddr_in) - * count); - agent_arg_ptr -> node_names = xmalloc (MAX_NAME_LEN * count); - agent_arg_ptr -> msg_type = agent_info_ptr -> msg_type; - agent_arg_ptr -> msg_args = *(agent_info_ptr -> msg_args_pptr); - *(agent_info_ptr -> msg_args_pptr) = NULL; + agent_arg_ptr = xmalloc(sizeof(agent_arg_t)); + agent_arg_ptr->node_count = count; + agent_arg_ptr->retry = 1; + agent_arg_ptr->slurm_addr = xmalloc(sizeof(struct sockaddr_in) + * count); + agent_arg_ptr->node_names = xmalloc(MAX_NAME_LEN * count); + agent_arg_ptr->msg_type = agent_info_ptr->msg_type; + agent_arg_ptr->msg_args = *(agent_info_ptr->msg_args_pptr); + *(agent_info_ptr->msg_args_pptr) = NULL; j = 0; - for (i=0; i<agent_info_ptr->thread_count; i++) { + for (i = 0; i < agent_info_ptr->thread_count; i++) { if (thread_ptr[i].state != DSH_FAILED) continue; agent_arg_ptr->slurm_addr[j] = thread_ptr[i].slurm_addr; - strncpy(&agent_arg_ptr->node_names[j*MAX_NAME_LEN], + strncpy(&agent_arg_ptr->node_names[j * MAX_NAME_LEN], thread_ptr[i].node_name, MAX_NAME_LEN); if ((++j) == count) break; } /* add the requeust to a list */ - pthread_mutex_lock (&retry_mutex); + slurm_mutex_lock(&retry_mutex); if (retry_list == NULL) { - retry_list = list_create (&_list_delete_retry); + retry_list = list_create(&_list_delete_retry); if (retry_list == NULL) - fatal ("list_create failed"); + fatal("list_create failed"); } - if (list_enqueue (retry_list, (void *)agent_arg_ptr) == 0) - fatal ("list_append failed"); - pthread_mutex_unlock (&retry_mutex); + if (list_enqueue(retry_list, (void *) agent_arg_ptr) == 0) + fatal("list_append failed"); + slurm_mutex_unlock(&retry_mutex); } /* * _list_delete_retry - delete an entry from the retry list, * see common/list.h for documentation */ -static void _list_delete_retry (void *retry_entry) +static void _list_delete_retry(void *retry_entry) { agent_arg_t *agent_arg_ptr; /* pointer to part_record */ agent_arg_ptr = (agent_arg_t *) retry_entry; - if (agent_arg_ptr -> slurm_addr) - xfree (agent_arg_ptr -> slurm_addr); - if (agent_arg_ptr -> node_names) - xfree (agent_arg_ptr -> node_names); + if (agent_arg_ptr->slurm_addr) + xfree(agent_arg_ptr->slurm_addr); + if (agent_arg_ptr->node_names) + xfree(agent_arg_ptr->node_names); #if AGENT_IS_THREAD - if (agent_arg_ptr -> msg_args) - xfree (agent_arg_ptr -> msg_args); + if (agent_arg_ptr->msg_args) + xfree(agent_arg_ptr->msg_args); #endif - xfree (agent_arg_ptr); + xfree(agent_arg_ptr); } -/* agent_retry - Agent for retrying pending RPCs (top one on the queue), - * argument is unused */ -void * agent_retry (void *args) +/* + * agent_retry - Agent for retrying pending RPCs (top one on the queue), + * IN args - unused + * RET always NULL (function format just for use as pthread) + */ +void *agent_retry(void *args) { agent_arg_t *agent_arg_ptr = NULL; - pthread_mutex_lock (&retry_mutex); + slurm_mutex_lock(&retry_mutex); if (retry_list) - agent_arg_ptr = (agent_arg_t *) list_dequeue (retry_list); - pthread_mutex_unlock (&retry_mutex); + agent_arg_ptr = (agent_arg_t *) list_dequeue(retry_list); + slurm_mutex_unlock(&retry_mutex); if (agent_arg_ptr) - _spawn_retry_agent (agent_arg_ptr); + _spawn_retry_agent(agent_arg_ptr); return NULL; } -/* retry_pending - retry all pending RPCs for the given node name */ -void retry_pending (char *node_name) +/* retry_pending - retry all pending RPCs for the given node name + * IN node_name - name of a node to executing pending RPCs for */ +void retry_pending(char *node_name) { int list_size = 0, i, j, found; agent_arg_t *agent_arg_ptr = NULL; - pthread_mutex_lock (&retry_mutex); + slurm_mutex_lock(&retry_mutex); if (retry_list) { - list_size = list_count (retry_list); + list_size = list_count(retry_list); } for (i = 0; i < list_size; i++) { - agent_arg_ptr = (agent_arg_t *) list_dequeue (retry_list); + agent_arg_ptr = (agent_arg_t *) list_dequeue(retry_list); found = 0; for (j = 0; j < agent_arg_ptr->node_count; j++) { - if (strncmp (&agent_arg_ptr->node_names[j*MAX_NAME_LEN], - node_name, MAX_NAME_LEN)) + if (strncmp + (&agent_arg_ptr->node_names[j * MAX_NAME_LEN], + node_name, MAX_NAME_LEN)) continue; found = 1; break; } if (found) /* issue this RPC */ - _spawn_retry_agent (agent_arg_ptr); + _spawn_retry_agent(agent_arg_ptr); else /* put the RPC back on the queue */ - list_enqueue (retry_list, (void*) agent_arg_ptr); + list_enqueue(retry_list, (void *) agent_arg_ptr); } - pthread_mutex_unlock (&retry_mutex); + slurm_mutex_unlock(&retry_mutex); } /* _spawn_retry_agent - pthread_crate an agent for the given task */ -static void _spawn_retry_agent (agent_arg_t *agent_arg_ptr) +static void _spawn_retry_agent(agent_arg_t * agent_arg_ptr) { pthread_attr_t attr_agent; pthread_t thread_agent; @@ -639,32 +661,32 @@ static void _spawn_retry_agent (agent_arg_t *agent_arg_ptr) if (agent_arg_ptr == NULL) return; - debug3 ("Spawning RPC retry agent"); - if (pthread_attr_init (&attr_agent)) - fatal ("pthread_attr_init error %m"); - if (pthread_attr_setdetachstate (&attr_agent, - PTHREAD_CREATE_DETACHED)) - error ("pthread_attr_setdetachstate error %m"); + debug3("Spawning RPC retry agent"); + if (pthread_attr_init(&attr_agent)) + fatal("pthread_attr_init error %m"); + if (pthread_attr_setdetachstate(&attr_agent, + PTHREAD_CREATE_DETACHED)) + error("pthread_attr_setdetachstate error %m"); #ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (&attr_agent, PTHREAD_SCOPE_SYSTEM)) - error ("pthread_attr_setscope error %m"); + if (pthread_attr_setscope(&attr_agent, PTHREAD_SCOPE_SYSTEM)) + error("pthread_attr_setscope error %m"); #endif - if (pthread_create (&thread_agent, &attr_agent, - agent, (void *)agent_arg_ptr)) { - error ("pthread_create error %m"); - sleep (1); /* sleep and try once more */ - if (pthread_create (&thread_agent, &attr_agent, - agent, (void *)agent_arg_ptr)) - fatal ("pthread_create error %m"); + if (pthread_create(&thread_agent, &attr_agent, + agent, (void *) agent_arg_ptr)) { + error("pthread_create error %m"); + sleep(1); /* sleep and try once more */ + if (pthread_create(&thread_agent, &attr_agent, + agent, (void *) agent_arg_ptr)) + fatal("pthread_create error %m"); } } /* _slurmctld_free_job_launch_msg is a variant of slurm_free_job_launch_msg * because all environment variables currently loaded in one xmalloc * buffer (see get_job_env()), which is different from how slurmd - * assembles the data from a message */ - -static void _slurmctld_free_job_launch_msg(batch_job_launch_msg_t * msg) + * assembles the data from a message + */ +static void _slurmctld_free_job_launch_msg(batch_job_launch_msg_t * msg) { if (msg) { if (msg->environment) { @@ -674,18 +696,17 @@ static void _slurmctld_free_job_launch_msg(batch_job_launch_msg_t * msg) xfree(msg->environment); msg->environment = NULL; } - slurm_free_job_launch_msg (msg); + slurm_free_job_launch_msg(msg); } } /* agent_purge - purge all pending RPC requests */ -void agent_purge (void) -{ retry_list = list_create (NULL); +void agent_purge(void) +{ + retry_list = list_create(NULL); - pthread_mutex_lock (&retry_mutex); + slurm_mutex_lock(&retry_mutex); if (retry_list == NULL) - list_destroy (retry_list); - pthread_mutex_unlock (&retry_mutex); + list_destroy(retry_list); + slurm_mutex_unlock(&retry_mutex); } - - diff --git a/src/slurmctld/agent.h b/src/slurmctld/agent.h index 7feda593a0b..3e91a90d60c 100644 --- a/src/slurmctld/agent.h +++ b/src/slurmctld/agent.h @@ -29,31 +29,43 @@ #ifndef _AGENT_H #define _AGENT_H -#include <src/slurmctld/agent.h> -#include <src/slurmctld/slurmctld.h> +#include "src/slurmctld/agent.h" +#include "src/slurmctld/slurmctld.h" -#define AGENT_IS_THREAD 1 /* set if agent itself a thread of slurmctld */ +#define AGENT_IS_THREAD 1 /* set if agent itself a thread of + * slurmctld, 0 for function call */ #define AGENT_THREAD_COUNT 20 /* maximum active agent threads */ #define COMMAND_TIMEOUT 5 /* seconds */ typedef struct agent_arg { - uint32_t node_count; /* number of nodes to communicate with */ - uint16_t retry; /* if set, keep trying */ - slurm_addr *slurm_addr; /* array of network addresses */ - char *node_names; /* array with MAX_NAME_LEN bytes per node */ - slurm_msg_type_t msg_type; /* RPC to be issued */ - void *msg_args; /* RPC data to be transmitted */ + uint32_t node_count; /* number of nodes to communicate + * with */ + uint16_t retry; /* if set, keep trying */ + slurm_addr *slurm_addr; /* array of network addresses */ + char *node_names; /* array with MAX_NAME_LEN bytes + * per node */ + slurm_msg_type_t msg_type; /* RPC to be issued */ + void *msg_args; /* RPC data to be transmitted */ } agent_arg_t; -/* agent - perform requested RPC in parallel and in the background, report status - * upon completion, input is pointer to agent_arg_t */ +/* + * agent - party responsible for transmitting an common RPC in parallel + * across a set of nodes + * IN pointer to agent_arg_t, which is xfree'd (including slurm_addr, + * node_names and msg_args) upon completion if AGENT_IS_THREAD is set + * RET always NULL (function format just for use as pthread) + */ extern void *agent (void *args); -/* agent_retry - Agent for retrying pending RPCs (top one on the queue), - * argument is unused */ +/* + * agent_retry - Agent for retrying pending RPCs (top one on the queue), + * IN args - unused + * RET always NULL (function format just for use as pthread) + */ extern void *agent_retry (void *args); -/* retry_pending - retry all pending RPCs for the given node name */ +/* retry_pending - retry all pending RPCs for the given node name + * IN node_name - name of a node to executing pending RPCs for */ extern void retry_pending (char *node_name); /* agent_purge - purge all pending RPC requests */ diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 27eeca1a18b..0bd6debd3da 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -39,6 +39,7 @@ #include <string.h> #include <syslog.h> #include <sys/resource.h> +#include <sys/stat.h> #include <sys/time.h> #include <sys/types.h> #include <netdb.h> @@ -75,60 +76,72 @@ log_options_t log_opts = { 1, LOG_LEVEL_INFO, LOG_LEVEL_INFO, LOG_LEVEL_QUIET }; + +/* Global variables */ slurm_ctl_conf_t slurmctld_conf; -time_t shutdown_time; -static pthread_mutex_t thread_count_lock = PTHREAD_MUTEX_INITIALIZER; -bool resume_backup; -int server_thread_count = 0; -pid_t slurmctld_pid; -pthread_t thread_id_main = (pthread_t) 0; -pthread_t thread_id_sig = (pthread_t) 0; -pthread_t thread_id_rpc = (pthread_t) 0; extern slurm_ssl_key_ctx_t sign_ctx; -int daemonize = DEFAULT_DAEMONIZE; -int recover = DEFAULT_RECOVER; - -void slurmctld_req(slurm_msg_t * msg); -void fill_ctld_conf(slurm_ctl_conf_t * build_ptr); -void parse_commandline(int argc, char *argv[], slurm_ctl_conf_t *); -static int ping_controller(void); -inline int report_locks_set(void); -static void run_backup(void); -inline static void save_all_state(void); -static int shutdown_backup_controller(void); -void *slurmctld_background(void *no_data); -static int background_process_msg(slurm_msg_t * msg); -static void *background_signal_hand(void *no_data); -static void *background_rpc_mgr(void *no_data); -static void *slurmctld_signal_hand(void *no_data); -static void *slurmctld_rpc_mgr(void *no_data); -inline static int slurmctld_shutdown(void); -void *service_connection(void *arg); +/* Local variables */ +static int daemonize = DEFAULT_DAEMONIZE; +static int recover = DEFAULT_RECOVER; +static bool resume_backup = false; +static time_t shutdown_time = (time_t) 0; +static int server_thread_count = 0; +static pid_t slurmctld_pid; + +#ifdef WITH_PTHREADS + static pthread_mutex_t thread_count_lock = PTHREAD_MUTEX_INITIALIZER; + static pthread_t thread_id_main = (pthread_t) 0; + static pthread_t thread_id_sig = (pthread_t) 0; + static pthread_t thread_id_rpc = (pthread_t) 0; +#else + static int thread_count_lock = 0; + static int thread_id_main = 0; + static int thread_id_sig = 0; + static int thread_id_rpc = 0; +#endif + +static int _background_process_msg(slurm_msg_t * msg); +static void * _background_rpc_mgr(void *no_data); +static void * _background_signal_hand(void *no_data); +static void _fill_ctld_conf(slurm_ctl_conf_t * build_ptr); +static void _parse_commandline(int argc, char *argv[], + slurm_ctl_conf_t *); +static int _ping_controller(void); +inline static int _report_locks_set(void); +static void _run_backup(void); +inline static void _save_all_state(void); +static void * _service_connection(void *arg); +static int _shutdown_backup_controller(void); inline static void _slurm_rpc_allocate_resources(slurm_msg_t * msg, - uint8_t immediate); -inline static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg); -inline static void _slurm_rpc_dump_build(slurm_msg_t * msg); -inline static void _slurm_rpc_dump_nodes(slurm_msg_t * msg); -inline static void _slurm_rpc_dump_partitions(slurm_msg_t * msg); -inline static void _slurm_rpc_dump_jobs(slurm_msg_t * msg); -inline static void _slurm_rpc_job_step_cancel(slurm_msg_t * msg); -inline static void _slurm_rpc_job_step_complete(slurm_msg_t * msg); -inline static void _slurm_rpc_job_step_create(slurm_msg_t * msg); -inline static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg); -inline static void _slurm_rpc_job_will_run(slurm_msg_t * msg); -inline static void _slurm_rpc_node_registration(slurm_msg_t * msg); -inline static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg); -inline static void _slurm_rpc_ping(slurm_msg_t * msg); -inline static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg); -inline static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg); -inline static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * - msg); -inline static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg); -inline static void _slurm_rpc_update_job(slurm_msg_t * msg); -inline static void _slurm_rpc_update_node(slurm_msg_t * msg); -inline static void _slurm_rpc_update_partition(slurm_msg_t * msg); -static void _usage(char *prog_name); + uint8_t immediate); +inline static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg); +inline static void _slurm_rpc_dump_build(slurm_msg_t * msg); +inline static void _slurm_rpc_dump_nodes(slurm_msg_t * msg); +inline static void _slurm_rpc_dump_partitions(slurm_msg_t * msg); +inline static void _slurm_rpc_dump_jobs(slurm_msg_t * msg); +inline static void _slurm_rpc_job_step_cancel(slurm_msg_t * msg); +inline static void _slurm_rpc_job_step_complete(slurm_msg_t * msg); +inline static void _slurm_rpc_job_step_create(slurm_msg_t * msg); +inline static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg); +inline static void _slurm_rpc_job_will_run(slurm_msg_t * msg); +inline static void _slurm_rpc_node_registration(slurm_msg_t * msg); +inline static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg); +inline static void _slurm_rpc_ping(slurm_msg_t * msg); +inline static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg); +inline static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg); +inline static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * + msg); +inline static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg); +inline static void _slurm_rpc_update_job(slurm_msg_t * msg); +inline static void _slurm_rpc_update_node(slurm_msg_t * msg); +inline static void _slurm_rpc_update_partition(slurm_msg_t * msg); +static void * _slurmctld_background(void *no_data); +static void _slurmctld_req(slurm_msg_t * msg); +static void * _slurmctld_rpc_mgr(void *no_data); +inline static int _slurmctld_shutdown(void); +static void * _slurmctld_signal_hand(void *no_data); +inline static void _usage(char *prog_name); typedef struct connection_arg { int newsockfd; @@ -151,7 +164,7 @@ int main(int argc, char *argv[]) slurmctld_pid = getpid(); slurmctld_conf.slurm_conf = xstrdup(SLURM_CONFIG_FILE); - parse_commandline(argc, argv, &slurmctld_conf); + _parse_commandline(argc, argv, &slurmctld_conf); if (daemonize) { error_code = daemon(0, 0); if (error_code) @@ -169,7 +182,7 @@ int main(int argc, char *argv[]) error_code, SLURM_CONFIG_FILE); if (slurmctld_conf.state_save_location) - (void) mkdir2(slurmctld_conf.state_save_location, 0700); + (void) mkdir(slurmctld_conf.state_save_location, 0700); if (slurmctld_conf.slurmctld_logfile && daemonize) { info("Routing all log messages to %s", @@ -216,14 +229,14 @@ int main(int argc, char *argv[]) if (slurmctld_conf.backup_controller && (strcmp(node_name, slurmctld_conf.backup_controller) == 0)) - run_backup(); + _run_backup(); else if (strcmp(node_name, slurmctld_conf.control_machine)) fatal ("this machine (%s) is not the primary (%s) or backup (%s) controller", node_name, slurmctld_conf.control_machine, slurmctld_conf.backup_controller); else /* primary tells secondary to shutdown */ - (void) shutdown_backup_controller(); + (void) _shutdown_backup_controller(); /* * create attached thread for signal handling @@ -237,15 +250,15 @@ int main(int argc, char *argv[]) error("pthread_attr_setscope error %m"); #endif if (pthread_create(&thread_id_sig, &thread_attr_sig, - slurmctld_signal_hand, NULL)) + _slurmctld_signal_hand, NULL)) fatal("pthread_create %m"); /* * create attached thread to process RPCs */ - pthread_mutex_lock(&thread_count_lock); + slurm_mutex_lock(&thread_count_lock); server_thread_count++; - pthread_mutex_unlock(&thread_count_lock); + slurm_mutex_unlock(&thread_count_lock); if (pthread_attr_init(&thread_attr_rpc)) fatal("pthread_attr_init error %m"); #ifdef PTHREAD_SCOPE_SYSTEM @@ -255,10 +268,10 @@ int main(int argc, char *argv[]) error("pthread_attr_setscope error %m"); #endif if (pthread_create(&thread_id_rpc, &thread_attr_rpc, - slurmctld_rpc_mgr, NULL)) + _slurmctld_rpc_mgr, NULL)) fatal("pthread_create error %m"); - slurmctld_background(NULL); /* could run as pthread */ + _slurmctld_background(NULL); /* could run as pthread */ if (resume_backup == false) break; } @@ -266,8 +279,8 @@ int main(int argc, char *argv[]) return SLURM_SUCCESS; } -/* slurmctld_signal_hand - Process daemon-wide signals */ -void *slurmctld_signal_hand(void *no_data) +/* _slurmctld_signal_hand - Process daemon-wide signals */ +static void *_slurmctld_signal_hand(void *no_data) { int sig; int error_code; @@ -279,7 +292,7 @@ void *slurmctld_signal_hand(void *no_data) (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - info("Send signals to slurmctld_signal_hand, pid = %u", getpid()); + info("Send signals to _slurmctld_signal_hand, pid = %u", getpid()); if (sigemptyset(&set)) error("sigemptyset error: %m"); @@ -303,7 +316,7 @@ void *slurmctld_signal_hand(void *no_data) info("Terminate signal (SIGINT or SIGTERM) received"); shutdown_time = time(NULL); /* send REQUEST_SHUTDOWN_IMMEDIATE RPC */ - slurmctld_shutdown(); + _slurmctld_shutdown(); pthread_join(thread_id_rpc, NULL); /* ssl clean up */ @@ -333,8 +346,8 @@ void *slurmctld_signal_hand(void *no_data) } -/* slurmctld_rpc_mgr - Read incoming RPCs and create pthread for each */ -void *slurmctld_rpc_mgr(void *no_data) +/* _slurmctld_rpc_mgr - Read incoming RPCs and create pthread for each */ +static void *_slurmctld_rpc_mgr(void *no_data) { slurm_fd newsockfd; slurm_fd sockfd; @@ -346,7 +359,7 @@ void *slurmctld_rpc_mgr(void *no_data) (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - debug3("slurmctld_rpc_mgr pid = %u", getpid()); + debug3("_slurmctld_rpc_mgr pid = %u", getpid()); /* threads to process individual RPC's are detached */ if (pthread_attr_init(&thread_attr_rpc_req)) @@ -382,19 +395,19 @@ void *slurmctld_rpc_mgr(void *no_data) continue; } conn_arg->newsockfd = newsockfd; - pthread_mutex_lock(&thread_count_lock); + slurm_mutex_lock(&thread_count_lock); server_thread_count++; - pthread_mutex_unlock(&thread_count_lock); + slurm_mutex_unlock(&thread_count_lock); if (server_thread_count >= MAX_SERVER_THREAD_COUNT) { info( - "Warning: server_thread_count is %d, over system limit", - server_thread_count); + "Warning: server_thread_count is %d, over system limit", + server_thread_count); no_thread = 1; } else if (shutdown_time) no_thread = 1; else if (pthread_create(&thread_id_rpc_req, &thread_attr_rpc_req, - service_connection, + _service_connection, (void *) conn_arg)) { error("pthread_create error %m"); no_thread = 1; @@ -402,22 +415,27 @@ void *slurmctld_rpc_mgr(void *no_data) no_thread = 0; if (no_thread) { - if (service_connection((void *) conn_arg)) + if (_service_connection((void *) conn_arg)) break; } } - debug3("slurmctld_rpc_mgr shutting down"); - pthread_mutex_lock(&thread_count_lock); + debug3("_slurmctld_rpc_mgr shutting down"); + slurm_mutex_lock(&thread_count_lock); server_thread_count--; - pthread_mutex_unlock(&thread_count_lock); + slurm_mutex_unlock(&thread_count_lock); (void) slurm_shutdown_msg_engine(sockfd); pthread_exit((void *) 0); } -/* service_connection - service the RPC, return NULL except in case of REQUEST_SHUTDOWN_IMMEDIATE */ -void *service_connection(void *arg) +/* + * _service_connection - service the RPC, return NULL except in case + * of REQUEST_SHUTDOWN_IMMEDIATE + * IN/OUT arg - really just the connection's file descriptor, freed + * upon completion + */ +static void *_service_connection(void *arg) { int error_code; slurm_fd newsockfd = ((connection_arg_t *) arg)->newsockfd; @@ -433,7 +451,7 @@ void *service_connection(void *arg) if (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE) return_code = (void *) "fini"; msg->conn_fd = newsockfd; - slurmctld_req(msg); /* process the request */ + _slurmctld_req (msg); /* process the request */ } /* close should only be called when the socket implementation is @@ -443,14 +461,18 @@ void *service_connection(void *arg) slurm_free_msg(msg); xfree(arg); - pthread_mutex_lock(&thread_count_lock); + slurm_mutex_lock(&thread_count_lock); server_thread_count--; - pthread_mutex_unlock(&thread_count_lock); + slurm_mutex_unlock(&thread_count_lock); return return_code; } -/* slurmctld_background - process slurmctld background activities */ -void *slurmctld_background(void *no_data) +/* + * _slurmctld_background - process slurmctld background activities + * purge defunct job records, save state, schedule jobs, and + * ping other nodes + */ +static void *_slurmctld_background(void *no_data) { static time_t last_sched_time; static time_t last_checkpoint_time; @@ -476,7 +498,7 @@ void *slurmctld_background(void *no_data) last_ping_time = last_rpc_retry_time = time(NULL); (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - debug3("slurmctld_background pid = %u", getpid()); + debug3("_slurmctld_background pid = %u", getpid()); while (shutdown_time == 0) { sleep(1); @@ -521,7 +543,7 @@ void *slurmctld_background(void *no_data) purge_old_job(); /* remove defunct job recs */ unlock_slurmctld(job_write_lock); if (schedule()) - last_checkpoint_time = 0; /* force save */ + last_checkpoint_time = 0; /* force save */ } if (shutdown_time || @@ -534,24 +556,24 @@ void *slurmctld_background(void *no_data) if (server_thread_count) sleep(1); if (server_thread_count) - info - ("warning: shutting down with server_thread_count of %d", + info( + "warning: shutting down with server_thread_count of %d", server_thread_count); - if (report_locks_set() == 0) { + if (_report_locks_set() == 0) { last_checkpoint_time = now; - save_all_state(); + _save_all_state(); } else error ("unable to save state due to set semaphores"); } else { last_checkpoint_time = now; debug("Performing full system state save"); - save_all_state(); + _save_all_state(); } } } - debug3("slurmctld_background shutting down"); + debug3("_slurmctld_background shutting down"); #if MEM_LEAK_TEST /* This should purge all allocated memory, *\ @@ -574,8 +596,8 @@ void *slurmctld_background(void *no_data) return NULL; } -/* save_all_state - save slurmctld state for later recovery */ -void save_all_state(void) +/* _save_all_state - save entire slurmctld state for later recovery */ +static void _save_all_state(void) { clock_t start_time; @@ -584,12 +606,15 @@ void save_all_state(void) (void) dump_all_node_state(); (void) dump_all_part_state(); (void) dump_all_job_state(); - info("save_all_state complete, time=%ld", + info("_save_all_state complete, time=%ld", (long) (clock() - start_time)); } -/* report_locks_set - report any slurmctld locks left set, return count */ -int report_locks_set(void) +/* + * _report_locks_set - report any slurmctld locks left set + * RET count of locks currently set + */ +static int _report_locks_set(void) { slurmctld_lock_flags_t lock_flags; char config[4] = "", job[4] = "", node[4] = "", partition[4] = ""; @@ -634,8 +659,10 @@ int report_locks_set(void) return lock_count; } -/* slurmctld_req - Process an individual RPC request */ -void slurmctld_req(slurm_msg_t * msg) +/* _slurmctld_req - Process an individual RPC request + * IN/OUT - the request message, data associated with the message is freed + */ +static void _slurmctld_req (slurm_msg_t * msg) { switch (msg->msg_type) { @@ -756,7 +783,7 @@ static void _slurm_rpc_dump_build(slurm_msg_t * msg) (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_NO_CHANGE_IN_DATA); } else { - fill_ctld_conf(&build_tbl); + _fill_ctld_conf(&build_tbl); unlock_slurmctld(config_read_lock); /* init response_msg structure */ @@ -925,15 +952,16 @@ static void _slurm_rpc_job_step_cancel(slurm_msg_t * msg) /* return result */ if (error_code) { - info("_slurm_rpc_job_step_cancel error %d for %u, time=%ld", - error_code, job_step_id_msg->job_id, - (long) (clock() - start_time)); + info( + "_slurm_rpc_job_step_cancel error %d for %u, time=%ld", + error_code, job_step_id_msg->job_id, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_job_step_cancel success for JobId=%u, time=%ld", - job_step_id_msg->job_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_cancel success for JobId=%u, time=%ld", + job_step_id_msg->job_id, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); /* Below functions provide their own locking */ @@ -950,17 +978,17 @@ static void _slurm_rpc_job_step_cancel(slurm_msg_t * msg) /* return result */ if (error_code) { info( - "_slurm_rpc_job_step_cancel error %d for %u.%u, time=%ld", - error_code, job_step_id_msg->job_id, - job_step_id_msg->job_step_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_cancel error %d for %u.%u, time=%ld", + error_code, job_step_id_msg->job_id, + job_step_id_msg->job_step_id, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_job_step_cancel success for %u.%u, time=%ld", - job_step_id_msg->job_id, - job_step_id_msg->job_step_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_cancel success for %u.%u, time=%ld", + job_step_id_msg->job_id, + job_step_id_msg->job_step_id, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); /* Below function provides its own locking */ @@ -992,21 +1020,22 @@ static void _slurm_rpc_job_step_complete(slurm_msg_t * msg) /* do RPC call */ /* First set node down as needed on fatal error */ if (complete_job_step_msg->slurm_rc != SLURM_SUCCESS) { - error ("Fatal slurmd error running job %u from node %s: %s", - complete_job_step_msg->job_id, - complete_job_step_msg->node_name, - slurm_strerror (complete_job_step_msg->slurm_rc)); + error("Fatal slurmd error running job %u from node %s: %s", + complete_job_step_msg->job_id, + complete_job_step_msg->node_name, + slurm_strerror(complete_job_step_msg->slurm_rc)); if ((uid != 0) && (uid != getuid())) { error_code = ESLURM_USER_ID_MISSING; - error("Security violation, uid %u can't set node down", - (unsigned int) uid); + error + ("Security violation, uid %u can't set node down", + (unsigned int) uid); } if (error_code == SLURM_SUCCESS) { update_node_msg_t update_node_msg; - update_node_msg.node_names = - complete_job_step_msg->node_name; + update_node_msg.node_names = + complete_job_step_msg->node_name; update_node_msg.node_state = NODE_STATE_DOWN; - error_code = update_node ( &update_node_msg ); + error_code = update_node(&update_node_msg); if (complete_job_step_msg->job_rc != SLURM_SUCCESS) job_requeue = true; } @@ -1014,53 +1043,55 @@ static void _slurm_rpc_job_step_complete(slurm_msg_t * msg) /* Mark job and/or job step complete */ if (complete_job_step_msg->job_step_id == NO_VAL) { - error_code = job_complete(complete_job_step_msg->job_id, - uid, job_requeue, - complete_job_step_msg->job_rc); + error_code = job_complete(complete_job_step_msg->job_id, + uid, job_requeue, + complete_job_step_msg->job_rc); unlock_slurmctld(job_write_lock); /* return result */ if (error_code) { info( - "_slurm_rpc_job_step_complete error %d for %u, time=%ld", - error_code, complete_job_step_msg->job_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_complete error %d for %u, time=%ld", + error_code, complete_job_step_msg->job_id, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_job_step_complete success for JobId=%u, time=%ld", - complete_job_step_msg->job_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_complete success for JobId=%u, time=%ld", + complete_job_step_msg->job_id, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); schedule(); /* Has own locking */ (void) dump_all_job_state(); /* Has own locking */ } } else { - error_code = job_step_complete(complete_job_step_msg->job_id, - complete_job_step_msg-> - job_step_id, uid); + error_code = + job_step_complete(complete_job_step_msg->job_id, + complete_job_step_msg->job_step_id, + uid); unlock_slurmctld(job_write_lock); /* return result */ if (error_code) { info( - "_slurm_rpc_job_step_complete error %d for %u.%u, time=%ld", - error_code, complete_job_step_msg->job_id, - complete_job_step_msg->job_step_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_complete error %d for %u.%u, time=%ld", + error_code, complete_job_step_msg->job_id, + complete_job_step_msg->job_step_id, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_job_step_complete success for %u.%u, time=%ld", - complete_job_step_msg->job_id, - complete_job_step_msg->job_step_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_complete success for %u.%u, time=%ld", + complete_job_step_msg->job_id, + complete_job_step_msg->job_step_id, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); (void) dump_all_job_state(); /* Has own locking */ } } } +/* _slurm_rpc_job_step_get_info - process request for job step info */ static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg) { clock_t start_time; @@ -1095,13 +1126,13 @@ static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg) resp_buffer = xfer_buf_data(buffer); if (error_code == ESLURM_INVALID_JOB_ID) info( - "_slurm_rpc_job_step_get_info, no such job step %u.%u, time=%ld", - request->job_id, request->step_id, - (long) (clock() - start_time)); + "_slurm_rpc_job_step_get_info, no such job step %u.%u, time=%ld", + request->job_id, request->step_id, + (long) (clock() - start_time)); else if (error_code) - error( - "_slurm_rpc_job_step_get_info, error %d, time=%ld", - error_code, (long) (clock() - start_time)); + error + ("_slurm_rpc_job_step_get_info, error %d, time=%ld", + error_code, (long) (clock() - start_time)); } if (error_code) @@ -1121,7 +1152,7 @@ static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg) } /* _slurm_rpc_update_job - process RPC to update the configuration of a - * job (e.g. priority) */ + * job (e.g. priority) */ static void _slurm_rpc_update_job(slurm_msg_t * msg) { /* init */ @@ -1145,15 +1176,15 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg) /* return result */ if (error_code) { - error - ("_slurm_rpc_update_job error %d for job id %u, time=%ld", + error( + "_slurm_rpc_update_job error %d for job id %u, time=%ld", error_code, job_desc_msg->job_id, (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_update_job complete for job id %u, time=%ld", - job_desc_msg->job_id, (long) (clock() - start_time)); + "_slurm_rpc_update_job complete for job id %u, time=%ld", + job_desc_msg->job_id, (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); /* Below functions provide their own locking */ schedule(); @@ -1162,14 +1193,14 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg) } /* _slurm_rpc_update_node - process RPC to update the configuration of a - * node (e.g. UP/DOWN) */ + * node (e.g. UP/DOWN) */ static void _slurm_rpc_update_node(slurm_msg_t * msg) { /* init */ int error_code = 0; clock_t start_time; update_node_msg_t *update_node_msg_ptr = - (update_node_msg_t *) msg->data; + (update_node_msg_t *) msg->data; /* Locks: Write node */ slurmctld_lock_t node_write_lock = { NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK @@ -1201,9 +1232,9 @@ static void _slurm_rpc_update_node(slurm_msg_t * msg) slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_update_node complete for node %s, time=%ld", - update_node_msg_ptr->node_names, - (long) (clock() - start_time)); + "_slurm_rpc_update_node complete for node %s, time=%ld", + update_node_msg_ptr->node_names, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); } @@ -1214,7 +1245,7 @@ static void _slurm_rpc_update_node(slurm_msg_t * msg) } /* _slurm_rpc_update_partition - process RPC to update the configuration - * of a partition (e.g. UP/DOWN) */ + * of a partition (e.g. UP/DOWN) */ static void _slurm_rpc_update_partition(slurm_msg_t * msg) { /* init */ @@ -1232,9 +1263,9 @@ static void _slurm_rpc_update_partition(slurm_msg_t * msg) uid = slurm_auth_uid(msg->cred); if ((uid != 0) && (uid != getuid())) { error_code = ESLURM_USER_ID_MISSING; - error( - "Security violation, UPDATE_PARTITION RPC from uid %u", - (unsigned int) uid); + error + ("Security violation, UPDATE_PARTITION RPC from uid %u", + (unsigned int) uid); } if (error_code == 0) { @@ -1246,14 +1277,15 @@ static void _slurm_rpc_update_partition(slurm_msg_t * msg) /* return result */ if (error_code) { - error - ("_slurm_rpc_update_partition error %d for partition %s, time=%ld", + error( + "_slurm_rpc_update_partition error %d for partition %s, time=%ld", error_code, part_desc_ptr->name, (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { - info("_slurm_rpc_update_partition complete for partition %s, time=%ld", - part_desc_ptr->name, (long) (clock() - start_time)); + info( + "_slurm_rpc_update_partition complete for partition %s, time=%ld", + part_desc_ptr->name, (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); /* NOTE: These functions provide their own locks */ @@ -1309,8 +1341,8 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_submit_batch_job success for id=%u, time=%ld", - job_id, (long) (clock() - start_time)); + "_slurm_rpc_submit_batch_job success for id=%u, time=%ld", + job_id, (long) (clock() - start_time)); /* send job_ID */ submit_msg.job_id = job_id; response_msg.msg_type = RESPONSE_SUBMIT_BATCH_JOB; @@ -1321,8 +1353,9 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) } } -/* _slurm_rpc_allocate_resources: process RPC to allocate resources for a job */ -void _slurm_rpc_allocate_resources(slurm_msg_t * msg, uint8_t immediate) +/* _slurm_rpc_allocate_resources: process RPC to allocate resources for + * a job */ +static void _slurm_rpc_allocate_resources(slurm_msg_t * msg, uint8_t immediate) { /* init */ int error_code = 0; @@ -1344,7 +1377,8 @@ void _slurm_rpc_allocate_resources(slurm_msg_t * msg, uint8_t immediate) start_time = clock(); if (immediate) - debug("Processing RPC: REQUEST_IMMEDIATE_RESOURCE_ALLOCATION"); + debug + ("Processing RPC: REQUEST_IMMEDIATE_RESOURCE_ALLOCATION"); else debug("Processing RPC: REQUEST_RESOURCE_ALLOCATION"); @@ -1370,13 +1404,13 @@ void _slurm_rpc_allocate_resources(slurm_msg_t * msg, uint8_t immediate) /* return result */ if (error_code) { info( - "_slurm_rpc_allocate_resources error %d allocating resources, time=%ld", - error_code, (long) (clock() - start_time)); + "_slurm_rpc_allocate_resources error %d allocating resources, time=%ld", + error_code, (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_allocate_resources allocated nodes %s to JobId=%u, time=%ld", - node_list_ptr, job_id, (long) (clock() - start_time)); + "_slurm_rpc_allocate_resources allocated nodes %s to JobId=%u, time=%ld", + node_list_ptr, job_id, (long) (clock() - start_time)); /* send job_ID and node_name_ptr */ @@ -1398,7 +1432,7 @@ void _slurm_rpc_allocate_resources(slurm_msg_t * msg, uint8_t immediate) } /* _slurm_rpc_allocate_and_run: process RPC to allocate resources for a job - * and initiate a job step */ + * and initiate a job step */ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) { /* init */ @@ -1434,7 +1468,7 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) ("Security violation, ALLOCATE_AND_RUN RPC from uid %u", (unsigned int) uid); } - + if (error_code == 0) { lock_slurmctld(job_write_lock); error_code = job_allocate(job_desc_msg, &job_id, @@ -1448,8 +1482,8 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) if (error_code) { unlock_slurmctld(job_write_lock); info( - "_slurm_rpc_allocate_and_run error %d allocating resources, time=%ld", - error_code, (long) (clock() - start_time)); + "_slurm_rpc_allocate_and_run error %d allocating resources, time=%ld", + error_code, (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); return; } @@ -1462,14 +1496,14 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg) if (error_code) { unlock_slurmctld(job_write_lock); info( - "_slurm_rpc_allocate_and_run error %d creating job step, time=%ld", - error_code, (long) (clock() - start_time)); + "_slurm_rpc_allocate_and_run error %d creating job step, time=%ld", + error_code, (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_allocate_and_run allocated nodes %s to JobId=%u, time=%ld", - node_list_ptr, job_id, (long) (clock() - start_time)); + "_slurm_rpc_allocate_and_run allocated nodes %s to JobId=%u, time=%ld", + node_list_ptr, job_id, (long) (clock() - start_time)); /* send job_ID and node_name_ptr */ alloc_msg.job_id = job_id; @@ -1538,15 +1572,15 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) /* return result */ if (error_code) { info( - "_slurm_rpc_old_job_alloc error %d getting info, job=%u, uid=%u, time=%ld", - error_code, job_desc_msg->job_id, job_desc_msg->uid, - (long) (clock() - start_time)); + "_slurm_rpc_old_job_alloc error %d getting info, job=%u, uid=%u, time=%ld", + error_code, job_desc_msg->job_id, job_desc_msg->uid, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_old_job_alloc job=%u has nodes %s, time=%ld", - job_desc_msg->job_id, node_list_ptr, - (long) (clock() - start_time)); + "_slurm_rpc_old_job_alloc job=%u has nodes %s, time=%ld", + job_desc_msg->job_id, node_list_ptr, + (long) (clock() - start_time)); /* send job_ID and node_name_ptr */ @@ -1566,7 +1600,7 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) } /* _slurm_rpc_job_will_run - process RPC to determine if job with given - * configuration can be initiated */ + * configuration can be initiated */ static void _slurm_rpc_job_will_run(slurm_msg_t * msg) { /* init */ @@ -1638,13 +1672,13 @@ static void _slurm_rpc_ping(slurm_msg_t * msg) /* _slurm_rpc_reconfigure_controller - process RPC to re-initialize - * slurmctld from configuration file */ + * slurmctld from configuration file */ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) { /* init */ int error_code = 0; clock_t start_time; - /* Locks: Write configuration, write job, write node, write partition */ + /* Locks: Write configuration, job, node and partition */ slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; @@ -1676,17 +1710,17 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) /* return result */ if (error_code) { - error - ("_slurm_rpc_reconfigure_controller error %d, time=%ld", + error( + "_slurm_rpc_reconfigure_controller error %d, time=%ld", error_code, (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { info( - "_slurm_rpc_reconfigure_controller completed successfully, time=%ld", - (long) (clock() - start_time)); + "_slurm_rpc_reconfigure_controller completed successfully, time=%ld", + (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); schedule(); - save_all_state(); + _save_all_state(); } } @@ -1729,7 +1763,7 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) ("thread_id_sig undefined, doing shutdown the hard way"); shutdown_time = time(NULL); /* send REQUEST_SHUTDOWN_IMMEDIATE RPC */ - slurmctld_shutdown(); + _slurmctld_shutdown(); } if (msg->msg_type == REQUEST_CONTROL) { @@ -1742,7 +1776,8 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) fatal("Aborting per RPC request"); } -/* _slurm_rpc_shutdown_controller_immediate - process RPC to shutdown slurmctld */ +/* _slurm_rpc_shutdown_controller_immediate - process RPC to shutdown + * slurmctld */ static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg) { int error_code = 0; @@ -1763,7 +1798,7 @@ static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg) } /* _slurm_rpc_job_step_create - process RPC to creates/registers a job step - * with the step_mgr */ + * with the step_mgr */ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) { /* init */ @@ -1807,8 +1842,6 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { - /* FIXME Needs to be fixed to really work with a credential */ - //slurm_job_credential_t cred = { 1,1,"test",start_time, "signature"} ; info("_slurm_rpc_job_step_create %u.%u success time=%ld", step_rec->job_ptr->job_id, step_rec->step_id, (long) (clock() - start_time)); @@ -1833,7 +1866,7 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) } /* _slurm_rpc_node_registration - process RPC to determine if a node's - * actual configuration satisfies the configured specification */ + * actual configuration satisfies the configured specification */ static void _slurm_rpc_node_registration(slurm_msg_t * msg) { /* init */ @@ -1875,24 +1908,26 @@ static void _slurm_rpc_node_registration(slurm_msg_t * msg) /* return result */ if (error_code) { - error - ("_slurm_rpc_node_registration error %d for %s, time=%ld", + error( + "_slurm_rpc_node_registration error %d for %s, time=%ld", error_code, node_reg_stat_msg->node_name, (long) (clock() - start_time)); slurm_send_rc_msg(msg, error_code); } else { - info("_slurm_rpc_node_registration complete for %s, time=%ld", - node_reg_stat_msg->node_name, - (long) (clock() - start_time)); + info( + "_slurm_rpc_node_registration complete for %s, time=%ld", + node_reg_stat_msg->node_name, + (long) (clock() - start_time)); slurm_send_rc_msg(msg, SLURM_SUCCESS); } } /* - * slurmctld_shutdown - issue RPC to have slurmctld shutdown, - * knocks loose an slurm_accept_msg_conn() if we have a thread hung there + * _slurmctld_shutdown - issue RPC to have slurmctld shutdown, knocks + * loose an slurm_accept_msg_conn() if we have a thread hung there + * RET 0 or error code */ -int slurmctld_shutdown() +static int _slurmctld_shutdown(void) { int rc; slurm_fd sockfd; @@ -1903,7 +1938,7 @@ int slurmctld_shutdown() * with self/controller */ slurm_set_addr(&self, slurmctld_conf.slurmctld_port, "localhost"); if ((sockfd = slurm_open_msg_conn(&self)) == SLURM_SOCKET_ERROR) { - error("slurmctld_shutdown/slurm_open_msg_conn: %m"); + error("_slurmctld_shutdown/slurm_open_msg_conn: %m"); return SLURM_SOCKET_ERROR; } @@ -1912,7 +1947,7 @@ int slurmctld_shutdown() if ((rc = slurm_send_node_msg(sockfd, &request_msg)) == SLURM_SOCKET_ERROR) { - error("slurmctld_shutdown/slurm_send_node_msg error: %m"); + error("_slurmctld_shutdown/slurm_send_node_msg error: %m"); return SLURM_SOCKET_ERROR; } @@ -1928,8 +1963,12 @@ int slurmctld_shutdown() return SLURM_PROTOCOL_SUCCESS; } - -void fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) +/* + * _fill_ctld_conf - make a copy of current slurm configuration + * this is done with locks set so the data can change at other times + * OUT conf_ptr - place to copy configuration to + */ +void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) { conf_ptr->last_update = slurmctld_conf.last_update; conf_ptr->backup_addr = slurmctld_conf.backup_addr; @@ -1962,8 +2001,14 @@ void fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) extern char *optarg; extern int optind, opterr, optopt; -/* parse_commandline - parse and process any command line arguments */ -void parse_commandline(int argc, char *argv[], slurm_ctl_conf_t * conf_ptr) +/* + * _parse_commandline - parse and process any command line arguments + * IN argc - number of command line arguments + * IN argv - the command line arguments + * IN/OUT conf_ptr - pointer to current configuration, update as needed + */ +static void _parse_commandline(int argc, char *argv[], + slurm_ctl_conf_t * conf_ptr) { int c = 0, errlev; char *log_file = NULL; @@ -2029,7 +2074,8 @@ void parse_commandline(int argc, char *argv[], slurm_ctl_conf_t * conf_ptr) log_init(argv[0], log_opts, SYSLOG_FACILITY_DAEMON, log_file); } -/* _usage - print a message describing the command line arguments of slurmctld */ +/* _usage - print a message describing the command line arguments of + * slurmctld */ static void _usage(char *prog_name) { printf("%s [OPTIONS]\n", prog_name); @@ -2050,9 +2096,9 @@ static void _usage(char *prog_name) ("<errlev> is an integer between 0 and 7 with higher numbers providing more detail\n"); } -/* run_backup - controller should run in standby mode, assuming control when the - * primary controller stops responding */ -void run_backup(void) +/* _run_backup - this is the backup controller, it should run in standby + * mode, assuming control when the primary controller stops responding */ +static void _run_backup(void) { time_t last_controller_response = time(NULL), last_ping = 0; pthread_attr_t thread_attr_sig, thread_attr_rpc; @@ -2071,7 +2117,7 @@ void run_backup(void) error("pthread_attr_setscope error %m"); #endif if (pthread_create(&thread_id_sig, - &thread_attr_sig, background_signal_hand, NULL)) + &thread_attr_sig, _background_signal_hand, NULL)) fatal("pthread_create %m"); /* @@ -2085,7 +2131,7 @@ void run_backup(void) error("pthread_attr_setscope error %m"); #endif if (pthread_create - (&thread_id_rpc, &thread_attr_rpc, background_rpc_mgr, NULL)) + (&thread_id_rpc, &thread_attr_rpc, _background_rpc_mgr, NULL)) fatal("pthread_create error %m"); /* repeatedly ping ControlMachine */ @@ -2096,7 +2142,7 @@ void run_backup(void) continue; last_ping = time(NULL); - if (ping_controller() == 0) + if (_ping_controller() == 0) last_controller_response = time(NULL); else if (difftime(time(NULL), last_controller_response) > slurmctld_conf.slurmctld_timeout) @@ -2119,15 +2165,16 @@ void run_backup(void) return; } -/* background_signal_hand - Process daemon-wide signals */ -void *background_signal_hand(void *no_data) +/* _background_signal_hand - Process daemon-wide signals for the + * backup controller */ +static void *_background_signal_hand(void *no_data) { int sig; sigset_t set; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - info("Send signals to background_signal_hand, pid = %u", getpid()); + info("Send signals to _background_signal_hand, pid = %u", getpid()); if (sigemptyset(&set)) error("sigemptyset error: %m"); @@ -2149,7 +2196,7 @@ void *background_signal_hand(void *no_data) info("Terminate signal (SIGINT or SIGTERM) received"); shutdown_time = time(NULL); /* send REQUEST_SHUTDOWN_IMMEDIATE RPC */ - slurmctld_shutdown(); + _slurmctld_shutdown(); pthread_join(thread_id_rpc, NULL); return NULL; /* Normal termination */ @@ -2163,8 +2210,9 @@ void *background_signal_hand(void *no_data) } } -/* background_rpc_mgr - Read and process incoming RPCs to the background controller (that's us) */ -void *background_rpc_mgr(void *no_data) +/* _background_rpc_mgr - Read and process incoming RPCs to the background + * controller (that's us) */ +static void *_background_rpc_mgr(void *no_data) { slurm_fd newsockfd; slurm_fd sockfd; @@ -2175,7 +2223,7 @@ void *background_rpc_mgr(void *no_data) (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - debug3("background_rpc_mgr pid = %u", getpid()); + debug3("_background_rpc_mgr pid = %u", getpid()); /* initialize port for RPCs */ if ((sockfd = @@ -2202,7 +2250,7 @@ void *background_rpc_mgr(void *no_data) == SLURM_SOCKET_ERROR) error("slurm_receive_msg error %m"); else { - error_code = background_process_msg(msg); + error_code = _background_process_msg(msg); if ((error_code == 0) && (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE)) done_flag = true; @@ -2215,13 +2263,13 @@ void *background_rpc_mgr(void *no_data) slurm_close_accepted_conn(newsockfd); /* close new socket */ } - debug3("background_rpc_mgr shutting down"); + debug3("_background_rpc_mgr shutting down"); slurm_close_accepted_conn(sockfd); /* close the main socket */ pthread_exit((void *) 0); } -/* background_process_msg - process an RPC to the backup_controller */ -int background_process_msg(slurm_msg_t * msg) +/* _background_process_msg - process an RPC to the backup_controller */ +static int _background_process_msg(slurm_msg_t * msg) { int error_code = 0; uid_t uid; @@ -2252,8 +2300,9 @@ int background_process_msg(slurm_msg_t * msg) return error_code; } -/* Ping ControlMachine, return 0 if no error */ -int ping_controller(void) +/* Ping primary ControlMachine + * RET 0 if no error */ +static int _ping_controller(void) { int rc, msg_size; slurm_fd sockfd; @@ -2269,7 +2318,7 @@ int ping_controller(void) slurmctld_conf.control_addr); if ((sockfd = slurm_open_msg_conn(&primary_addr)) == SLURM_SOCKET_ERROR) { - error("ping_controller/slurm_open_msg_conn: %m"); + error("_ping_controller/slurm_open_msg_conn: %m"); return SLURM_SOCKET_ERROR; } @@ -2278,21 +2327,21 @@ int ping_controller(void) if ((rc = slurm_send_node_msg(sockfd, &request_msg)) == SLURM_SOCKET_ERROR) { - error("ping_controller/slurm_send_node_msg error: %m"); + error("_ping_controller/slurm_send_node_msg error: %m"); return SLURM_SOCKET_ERROR; } /* receive message */ if ((msg_size = slurm_receive_msg(sockfd, &response_msg)) == SLURM_SOCKET_ERROR) { - error("ping_controller/slurm_receive_msg error: %m"); + error("_ping_controller/slurm_receive_msg error: %m"); return SLURM_SOCKET_ERROR; } /* shutdown message connection */ if ((rc = slurm_shutdown_msg_conn(sockfd)) == SLURM_SOCKET_ERROR) { - error("ping_controller/slurm_shutdown_msg_conn error: %m"); + error("_ping_controller/slurm_shutdown_msg_conn error: %m"); return SLURM_SOCKET_ERROR; } @@ -2305,12 +2354,12 @@ int ping_controller(void) rc = slurm_rc_msg->return_code; slurm_free_return_code_msg(slurm_rc_msg); if (rc) { - error("ping_controller/response error %d", rc); + error("_ping_controller/response error %d", rc); return SLURM_PROTOCOL_ERROR; } break; default: - error("ping_controller/unexpected message type %d", + error("_ping_controller/unexpected message type %d", response_msg.msg_type); return SLURM_PROTOCOL_ERROR; break; @@ -2319,8 +2368,10 @@ int ping_controller(void) } /* Tell the backup_controller to relinquish control, primary control_machine - * has resumed operation */ -static int shutdown_backup_controller(void) + * has resumed operation + * RET 0 or an error code + */ +static int _shutdown_backup_controller(void) { int rc; int msg_size; @@ -2341,7 +2392,7 @@ static int shutdown_backup_controller(void) if ((sockfd = slurm_open_msg_conn(&secondary_addr)) == SLURM_SOCKET_ERROR) { error - ("shutdown_backup_controller/slurm_open_msg_conn: %m"); + ("_shutdown_backup_controller/slurm_open_msg_conn: %m"); return SLURM_SOCKET_ERROR; } @@ -2352,7 +2403,7 @@ static int shutdown_backup_controller(void) if ((rc = slurm_send_node_msg(sockfd, &request_msg)) == SLURM_SOCKET_ERROR) { error - ("shutdown_backup_controller/slurm_send_node_msg error: %m"); + ("_shutdown_backup_controller/slurm_send_node_msg error: %m"); return SLURM_SOCKET_ERROR; } @@ -2360,7 +2411,7 @@ static int shutdown_backup_controller(void) if ((msg_size = slurm_receive_msg(sockfd, &response_msg)) == SLURM_SOCKET_ERROR) { error - ("shutdown_backup_controller/slurm_receive_msg error: %m"); + ("_shutdown_backup_controller/slurm_receive_msg error: %m"); return SLURM_SOCKET_ERROR; } @@ -2368,7 +2419,7 @@ static int shutdown_backup_controller(void) if ((rc = slurm_shutdown_msg_conn(sockfd)) == SLURM_SOCKET_ERROR) { error - ("shutdown_backup_controller/slurm_shutdown_msg_conn error: %m"); + ("_shutdown_backup_controller/slurm_shutdown_msg_conn error: %m"); return SLURM_SOCKET_ERROR; } @@ -2382,7 +2433,7 @@ static int shutdown_backup_controller(void) slurm_free_return_code_msg(slurm_rc_msg); if (rc) { error - ("shutdown_backup_controller/response error %d", + ("_shutdown_backup_controller/response error %d", rc); return SLURM_PROTOCOL_ERROR; } else @@ -2390,7 +2441,7 @@ static int shutdown_backup_controller(void) break; default: error - ("shutdown_backup_controller/unexpected message type %d", + ("_shutdown_backup_controller/unexpected message type %d", response_msg.msg_type); return SLURM_PROTOCOL_ERROR; break; diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 471ce933fbd..e7745e3806a 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -68,60 +68,69 @@ slurm_ssl_key_ctx_t sign_ctx; #define STEP_FLAG 0xbbbb #define TOP_PRIORITY 0xffff0000 /* large, but leave headroom for higher */ -#define job_hash_inx(job_id) (job_id % MAX_JOB_COUNT) -#define yes_or_no(in_string) \ - (( strcmp ((in_string),"YES"))? \ - (strcmp((in_string),"NO")? \ +#define FREE_NULL(_X) \ + do { \ + if (_X) xfree (_X); \ + _X = NULL; \ + } while (0) + +#define JOB_HASH_INX(_job_id) (_job_id % MAX_JOB_COUNT) + +#define YES_OR_NO(_in_string) \ + (( strcmp ((_in_string),"YES"))? \ + (strcmp((_in_string),"NO")? \ -1 : 0 ) : 1 ) +/* Global variables */ +List job_list = NULL; /* job_record list */ +time_t last_job_update; /* time of last update to job records */ +/* Local variables */ static int default_prio = TOP_PRIORITY; static int job_count; /* job's in the system */ static long job_id_sequence = -1; /* first job_id to assign new job */ -List job_list = NULL; /* job_record list */ -time_t last_job_update; /* time of last update to job records */ static struct job_record *job_hash[MAX_JOB_COUNT]; static struct job_record *job_hash_over[MAX_JOB_COUNT]; static int max_hash_over = 0; -void add_job_hash(struct job_record *job_ptr); +static void _add_job_hash(struct job_record *job_ptr); static int _copy_job_desc_to_file(job_desc_msg_t * job_desc, uint32_t job_id); static int _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, struct job_record **job_ptr, struct part_record *part_ptr, bitstr_t * req_bitmap); -void delete_job_desc_files(uint32_t job_id); -void dump_job_state(struct job_record *dump_job_ptr, Buf buffer); -void dump_job_details_state(struct job_details *detail_ptr, Buf buffer); -void dump_job_step_state(struct step_record *step_ptr, Buf buffer); -int job_create(job_desc_msg_t * job_specs, uint32_t * new_job_id, - int allocate, int will_run, struct job_record **job_rec_ptr, - uid_t submit_uid); -void list_delete_job(void *job_entry); -int list_find_job_id(void *job_entry, void *key); -int list_find_job_old(void *job_entry, void *key); -void read_data_from_file(char *file_name, char **data); -void read_data_array_from_file(char *file_name, char ***data, - uint16_t * size); -void signal_job_on_node(uint32_t job_id, uint16_t step_id, int signum, - char *node_name); -int top_priority(struct job_record *job_ptr); -int validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate); +static void _delete_job_desc_files(uint32_t job_id); +static void _dump_job_details_state(struct job_details *detail_ptr, + Buf buffer); +static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer); +static void _dump_job_step_state(struct step_record *step_ptr, Buf buffer); +static int _job_create(job_desc_msg_t * job_specs, uint32_t * new_job_id, + int allocate, int will_run, + struct job_record **job_rec_ptr, uid_t submit_uid); +static void _list_delete_job(void *job_entry); +static int _list_find_job_old(void *job_entry, void *key); +static void _read_data_array_from_file(char *file_name, char ***data, + uint16_t * size); +static void _read_data_from_file(char *file_name, char **data); +static void _set_job_id(struct job_record *job_ptr); +static void _set_job_prio(struct job_record *job_ptr); +static void _signal_job_on_node(uint32_t job_id, uint16_t step_id, + int signum, char *node_name); +static int _top_priority(struct job_record *job_ptr); +static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate); static int _write_data_to_file(char *file_name, char *data); static int _write_data_array_to_file(char *file_name, char **data, uint16_t size); -static inline void x_clear(void *arg); /* * create_job_record - create an empty job_record including job_details. * load its values with defaults (zeros, nulls, and magic cookie) - * input: error_code - location to store error value in - * output: error_code - set to zero if no error, errno otherwise - * returns a pointer to the record or NULL if error + * IN/OUT error_code - set to zero if no error, errno otherwise + * RET pointer to the record or NULL if error * global: job_list - global job list * job_count - number of jobs in the system * last_job_update - time of last job table update - * NOTE: allocates memory that should be xfreed with list_delete_job + * NOTE: allocates memory that should be xfreed with _list_delete_job */ struct job_record *create_job_record(int *error_code) { @@ -161,15 +170,16 @@ struct job_record *create_job_record(int *error_code) /* * delete_job_details - delete a job's detail record and clear it's pointer - * this information can be deleted as soon as the job is allocated resources - * input: job_entry - pointer to job_record to clear the record of + * this information can be deleted as soon as the job is allocated + * resources and running (could need to restart batch job) + * IN job_entry - pointer to job_record to clear the record of */ void delete_job_details(struct job_record *job_entry) { if (job_entry->details == NULL) return; - delete_job_desc_files(job_entry->job_id); + _delete_job_desc_files(job_entry->job_id); if (job_entry->details->magic != DETAILS_MAGIC) fatal ("delete_job_details: passed invalid job details pointer"); @@ -191,8 +201,8 @@ void delete_job_details(struct job_record *job_entry) job_entry->details = NULL; } -/* delete_job_desc_files - delete job descriptor related files */ -void delete_job_desc_files(uint32_t job_id) +/* _delete_job_desc_files - delete job descriptor related files */ +static void _delete_job_desc_files(uint32_t job_id) { char *dir_name, job_dir[20], *file_name; struct stat sbuf; @@ -213,11 +223,12 @@ void delete_job_desc_files(uint32_t job_id) xfree(file_name); if (stat(dir_name, &sbuf) == 0) /* remove job directory as needed */ - (void) rmdir2(dir_name); + (void) rmdir(dir_name); xfree(dir_name); } -/* dump_all_job_state - save the state of all jobs to file */ +/* dump_all_job_state - save the state of all jobs to file + * RET 0 or error code */ int dump_all_job_state(void) { int error_code = 0, log_fd; @@ -239,7 +250,7 @@ int dump_all_job_state(void) (struct job_record *) list_next(job_record_iterator))) { if (job_record_point->magic != JOB_MAGIC) fatal("dump_all_job: job integrity is bad"); - dump_job_state(job_record_point, buffer); + _dump_job_state(job_record_point, buffer); } unlock_slurmctld(job_read_lock); list_iterator_destroy(job_record_iterator); @@ -286,11 +297,12 @@ int dump_all_job_state(void) } /* - * dump_job_state - dump the state of a specific job, its details, and steps to a buffer - * dump_job_ptr (I) - pointer to job for which information is requested - * buffer (I/O) - location to store data, pointers automatically advanced + * _dump_job_state - dump the state of a specific job, its details, and + * steps to a buffer + * IN dump_job_ptr - pointer to job for which information is requested + * IN/OUT buffer - location to store data, pointers automatically advanced */ -void dump_job_state(struct job_record *dump_job_ptr, Buf buffer) +static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) { struct job_details *detail_ptr; ListIterator step_record_iterator; @@ -318,7 +330,7 @@ void dump_job_state(struct job_record *dump_job_ptr, Buf buffer) if (detail_ptr->magic != DETAILS_MAGIC) fatal("dump_all_job: job detail integrity is bad"); pack16((uint16_t) DETAILS_FLAG, buffer); - dump_job_details_state(detail_ptr, buffer); + _dump_job_details_state(detail_ptr, buffer); } else pack16((uint16_t) 0, buffer); /* no details flag */ @@ -328,18 +340,19 @@ void dump_job_state(struct job_record *dump_job_ptr, Buf buffer) while ((step_record_ptr = (struct step_record *) list_next(step_record_iterator))) { pack16((uint16_t) STEP_FLAG, buffer); - dump_job_step_state(step_record_ptr, buffer); + _dump_job_step_state(step_record_ptr, buffer); }; list_iterator_destroy(step_record_iterator); pack16((uint16_t) 0, buffer); /* no step flag */ } /* - * dump_job_details_state - dump the state of a specific job details to a buffer - * detail_ptr (I) - pointer to job details for which information is requested - * buffer (I/O) - location to store data, pointers automatically advanced + * _dump_job_details_state - dump the state of a specific job details to + * a buffer + * IN detail_ptr - pointer to job details for which information is requested + * IN/OUT buffer - location to store data, pointers automatically advanced */ -void dump_job_details_state(struct job_details *detail_ptr, Buf buffer) +void _dump_job_details_state(struct job_details *detail_ptr, Buf buffer) { char tmp_str[MAX_STR_PACK]; @@ -376,8 +389,8 @@ void dump_job_details_state(struct job_details *detail_ptr, Buf buffer) packstr(tmp_str, buffer); } - if (detail_ptr->err == NULL || - strlen(detail_ptr->err) < MAX_STR_PACK) + if ((detail_ptr->err == NULL) || + (strlen(detail_ptr->err) < MAX_STR_PACK)) packstr(detail_ptr->err, buffer); else { strncpy(tmp_str, detail_ptr->err, MAX_STR_PACK); @@ -385,8 +398,8 @@ void dump_job_details_state(struct job_details *detail_ptr, Buf buffer) packstr(tmp_str, buffer); } - if (detail_ptr->in == NULL || - strlen(detail_ptr->in) < MAX_STR_PACK) + if ((detail_ptr->in == NULL) || + (strlen(detail_ptr->in) < MAX_STR_PACK)) packstr(detail_ptr->in, buffer); else { strncpy(tmp_str, detail_ptr->in, MAX_STR_PACK); @@ -403,8 +416,8 @@ void dump_job_details_state(struct job_details *detail_ptr, Buf buffer) packstr(tmp_str, buffer); } - if (detail_ptr->work_dir == NULL || - strlen(detail_ptr->work_dir) < MAX_STR_PACK) + if ((detail_ptr->work_dir == NULL) || + (strlen(detail_ptr->work_dir) < MAX_STR_PACK)) packstr(detail_ptr->work_dir, buffer); else { strncpy(tmp_str, detail_ptr->work_dir, MAX_STR_PACK); @@ -414,11 +427,11 @@ void dump_job_details_state(struct job_details *detail_ptr, Buf buffer) } /* - * dump_job_step_state - dump the state of a specific job step to a buffer - * detail_ptr (I) - pointer to job step for which information is requested - * buffer (I/O) - location to store data, pointers automatically advanced + * _dump_job_step_state - dump the state of a specific job step to a buffer + * IN detail_ptr - pointer to job step for which information is requested + * IN/OUT buffer - location to store data, pointers automatically advanced */ -void dump_job_step_state(struct step_record *step_ptr, Buf buffer) +static void _dump_job_step_state(struct step_record *step_ptr, Buf buffer) { char *node_list; @@ -434,8 +447,9 @@ void dump_job_step_state(struct step_record *step_ptr, Buf buffer) } /* - * load_job_state - load the job state from file, recover from slurmctld restart. - * execute this after loading the configuration file data. + * load_job_state - load the job state from file, recover from last slurmctld + * checkpoint. Execute this after loading the configuration file data. + * RET 0 or error code */ int load_job_state(void) { @@ -449,8 +463,8 @@ int load_job_state(void) uint16_t job_state, next_step_id, details; char *nodes = NULL, *partition = NULL, *name = NULL; uint32_t num_procs, num_nodes, min_procs, min_memory, min_tmp_disk; - uint16_t shared, contiguous, kill_on_node_fail, name_len, - batch_flag; + uint16_t shared, contiguous, kill_on_node_fail, name_len; + uint16_t batch_flag; char *req_nodes = NULL, *features = NULL; char *err = NULL, *in = NULL, *out = NULL, *work_dir = NULL; slurm_job_credential_t *credential_ptr = NULL; @@ -514,9 +528,9 @@ int load_job_state(void) job_id, job_state, kill_on_node_fail); error ("No more job data will be processed from the checkpoint file"); - x_clear(nodes); - x_clear(partition); - x_clear(name); + FREE_NULL(nodes); + FREE_NULL(partition); + FREE_NULL(name); error_code = EINVAL; break; } @@ -559,12 +573,12 @@ int load_job_state(void) batch_flag); error ("No more job data will be processed from the checkpoint file"); - x_clear(req_nodes); - x_clear(features); - x_clear(err); - x_clear(in); - x_clear(out); - x_clear(work_dir); + FREE_NULL(req_nodes); + FREE_NULL(features); + FREE_NULL(err); + FREE_NULL(in); + FREE_NULL(out); + FREE_NULL(work_dir); error_code = EINVAL; break; } @@ -611,7 +625,7 @@ int load_job_state(void) strncpy(job_ptr->partition, partition, MAX_NAME_LEN); job_ptr->part_ptr = part_ptr; - add_job_hash(job_ptr); + _add_job_hash(job_ptr); info("recovered job id %u", job_id); } @@ -717,15 +731,15 @@ int load_job_state(void) break; cleanup: - x_clear(nodes); - x_clear(partition); - x_clear(name); - x_clear(req_nodes); - x_clear(features); - x_clear(err); - x_clear(in); - x_clear(out); - x_clear(work_dir); + FREE_NULL(nodes); + FREE_NULL(partition); + FREE_NULL(name); + FREE_NULL(req_nodes); + FREE_NULL(features); + FREE_NULL(err); + FREE_NULL(in); + FREE_NULL(out); + FREE_NULL(work_dir); if (node_bitmap) { bit_free(node_bitmap); node_bitmap = NULL; @@ -746,25 +760,29 @@ int load_job_state(void) unpack_error: error ("Incomplete job data checkpoint file. State not completely restored"); - x_clear(nodes); - x_clear(partition); - x_clear(name); - x_clear(req_nodes); - x_clear(features); - x_clear(err); - x_clear(in); - x_clear(out); - x_clear(work_dir); + FREE_NULL(nodes); + FREE_NULL(partition); + FREE_NULL(name); + FREE_NULL(req_nodes); + FREE_NULL(features); + FREE_NULL(err); + FREE_NULL(in); + FREE_NULL(out); + FREE_NULL(work_dir); free_buf(buffer); return EFAULT; } -/* add_job_hash - add a job hash entry for given job record, job_id must already be set */ -void add_job_hash(struct job_record *job_ptr) +/* _add_job_hash - add a job hash entry for given job record, job_id must + * already be set + * IN job_ptr - pointer to job record + * Globals: hash table updated + */ +void _add_job_hash(struct job_record *job_ptr) { int inx; - inx = job_hash_inx(job_ptr->job_id); + inx = JOB_HASH_INX(job_ptr->job_id); if (job_hash[inx]) { if (max_hash_over >= MAX_JOB_COUNT) fatal("Job hash table overflow"); @@ -776,19 +794,19 @@ void add_job_hash(struct job_record *job_ptr) /* * find_job_record - return a pointer to the job record with the given job_id - * input: job_id - requested job's id - * output: pointer to the job's record, NULL on error - * job_hash, job_hash_over, max_hash_over - hash table into job records + * IN job_id - requested job's id + * RET pointer to the job's record, NULL on error * global: job_list - global job list pointer + * job_hash, job_hash_over, max_hash_over - hash table into job records */ struct job_record *find_job_record(uint32_t job_id) { int i; /* First try to find via hash table */ - if (job_hash[job_hash_inx(job_id)] && - job_hash[job_hash_inx(job_id)]->job_id == job_id) - return job_hash[job_hash_inx(job_id)]; + if (job_hash[JOB_HASH_INX(job_id)] && + job_hash[JOB_HASH_INX(job_id)]->job_id == job_id) + return job_hash[JOB_HASH_INX(job_id)]; /* linear search of overflow hash table overflow */ for (i = 0; i < max_hash_over; i++) { @@ -800,8 +818,12 @@ struct job_record *find_job_record(uint32_t job_id) return NULL; } -/* find_running_job_by_node_name - Given a node name, return a pointer to any - * job currently running on that node */ +/* + * find_running_job_by_node_name - Given a node name, return a pointer to any + * job currently running on that node + * IN node_name - name of a node + * RET pointer to the job's record, NULL if no job on node found + */ struct job_record *find_running_job_by_node_name(char *node_name) { ListIterator job_record_iterator; @@ -827,9 +849,11 @@ struct job_record *find_running_job_by_node_name(char *node_name) return job_record_point; } -/* kill_running_job_by_node_name - Given a node name, deallocate that job +/* + * kill_running_job_by_node_name - Given a node name, deallocate that job * from the node or kill it - * returns: number of killed jobs + * IN node_name - name of a node + * RET number of killed jobs */ int kill_running_job_by_node_name(char *node_name) { @@ -873,7 +897,10 @@ int kill_running_job_by_node_name(char *node_name) -/* dump_job_desc - dump the incoming job submit request message */ +/* + * dump_job_desc - dump the incoming job submit request message + * IN job_specs - job specification from RPC + */ void dump_job_desc(job_desc_msg_t * job_specs) { long job_id, min_procs, min_memory, min_tmp_disk, num_procs; @@ -884,7 +911,7 @@ void dump_job_desc(job_desc_msg_t * job_specs) return; job_id = (job_specs->job_id != NO_VAL) ? job_specs->job_id : -1; - debug3("JobDesc: user_id=%u job_id=%ld partition=%s name=%s\n", + debug3("JobDesc: user_id=%u job_id=%ld partition=%s name=%s", job_specs->user_id, job_id, job_specs->partition, job_specs->name); @@ -948,15 +975,15 @@ void dump_job_desc(job_desc_msg_t * job_specs) * init_job_conf - initialize the job configuration tables and values. * this should be called after creating node information, but * before creating any job entries. - * output: return value - 0 if no error, otherwise an error code + * RET 0 if no error, otherwise an error code * global: last_job_update - time of last job table update * job_list - pointer to global job list */ -int init_job_conf() +int init_job_conf(void) { if (job_list == NULL) { job_count = 0; - job_list = list_create(&list_delete_job); + job_list = list_create(&_list_delete_job); if (job_list == NULL) fatal ("init_job_conf: list_create can not allocate memory"); @@ -967,52 +994,51 @@ int init_job_conf() /* - * job_allocate - create job_records for the suppied job specification and allocate nodes for it. - * input: job_specs - job specifications - * new_job_id - location for storing new job's id - * node_list - location for storing new job's allocated nodes - * num_cpu_groups - location to store number of cpu groups - * cpus_per_node - location to store pointer to array of numbers of cpus on each node allocated - * cpu_count_reps - location to store pointer to array of numbers of consecutive nodes having - * same cpu count - * immediate - if set then either initiate the job immediately or fail - * will_run - don't initiate the job if set, just test if it could run now or later - * allocate - resource allocation request if set, not a full job - * output: new_job_id - the job's ID - * num_cpu_groups - number of cpu groups (elements in cpus_per_node and cpu_count_reps) - * cpus_per_node - pointer to array of numbers of cpus on each node allocate - * cpu_count_reps - pointer to array of numbers of consecutive nodes having same cpu count - * node_list - list of nodes allocated to the job - * node_cnt - number of allocated nodes - * node_addr - slurm_addr's for the allocated nodes - * returns 0 on success, EINVAL if specification is invalid, - * EAGAIN if higher priority jobs exist - * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts of - * 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} and - * cpu_count_reps={4,2,2} + * job_allocate - create job_records for the suppied job specification and + * allocate nodes for it. + * IN job_specs - job specifications + * IN node_list - location for storing new job's allocated nodes + * IN immediate - if set then either initiate the job immediately or fail + * IN will_run - don't initiate the job if set, just test if it could run + * now or later + * IN allocate - resource allocation request if set, not a full job + * OUT new_job_id - the new job's ID + * OUT num_cpu_groups - number of cpu groups (elements in cpus_per_node + * and cpu_count_reps) + * OUT cpus_per_node - pointer to array of numbers of cpus on each node + * allocate + * OUT cpu_count_reps - pointer to array of numbers of consecutive nodes + * having same cpu count + * OUT node_list - list of nodes allocated to the job + * OUT node_cnt - number of allocated nodes + * OUT node_addr - slurm_addr's for the allocated nodes + * RET 0 or an error code + * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts + * of 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} + * and cpu_count_reps={4,2,2} * globals: job_list - pointer to global job list * list_part - global list of partition info * default_part_loc - pointer to default partition */ -int -job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, - char **node_list, uint16_t * num_cpu_groups, - uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, - int immediate, int will_run, int allocate, uid_t submit_uid, - uint16_t * node_cnt, slurm_addr ** node_addr) +int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, + char **node_list, uint16_t * num_cpu_groups, + uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, + int immediate, int will_run, int allocate, + uid_t submit_uid, uint16_t * node_cnt, + slurm_addr ** node_addr) { int error_code, test_only; struct job_record *job_ptr; - error_code = job_create(job_specs, new_job_id, allocate, will_run, - &job_ptr, submit_uid); + error_code = _job_create(job_specs, new_job_id, allocate, will_run, + &job_ptr, submit_uid); if (error_code) return error_code; if (job_ptr == NULL) fatal("job_allocate: allocated job %u lacks record", new_job_id); - if (immediate && top_priority(job_ptr) != 1) { + if (immediate && _top_priority(job_ptr) != 1) { job_ptr->job_state = JOB_FAILED; job_ptr->end_time = 0; return ESLURM_NOT_TOP_PRIORITY; @@ -1020,7 +1046,7 @@ job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, test_only = will_run || (allocate == 0); if (test_only == 0) { - /* Some of these pointers are NULL on submit (e.g. allocate == 0) */ + /* Some of these pointers are NULL on submit */ if (num_cpu_groups) *num_cpu_groups = 0; if (node_list) @@ -1052,7 +1078,7 @@ job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, return error_code; } - if (will_run) { /* job would run now, flag job record destruction */ + if (will_run) { /* job would run, flag job destruction */ job_ptr->job_state = JOB_FAILED; job_ptr->end_time = 0; } @@ -1077,9 +1103,9 @@ job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, /* * job_cancel - cancel the specified job - * input: job_id - id of the job to be cancelled - * uid - uid of requesting user - * output: returns 0 on success, otherwise ESLURM error code + * IN job_id - id of the job to be cancelled + * IN uid - uid of requesting user + * RET 0 on success, otherwise ESLURM error code * global: job_list - pointer global job list * last_job_update - time of last job table update */ @@ -1189,34 +1215,31 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, } /* - * job_create - create a job table record for the supplied specifications. - * this performs only basic tests for request validity (access to partition, - * nodes count in partition, and sufficient processors in partition). + * _job_create - create a job table record for the supplied specifications. + * this performs only basic tests for request validity (access to + * partition, nodes count in partition, and sufficient processors in + * partition). * input: job_specs - job specifications - * new_job_id - location for storing new job's id - * allocate - resource allocation request if set rather than job submit - * will_run - job is not to be created, test of validity only - * job_rec_ptr - place to park pointer to the job (or NULL) - * output: new_job_id - the job's ID - * returns 0 on success, otherwise ESLURM error code - * allocate - if set, job allocation only (no script required) - * will_run - if set then test only, don't create a job entry - * job_rec_ptr - pointer to the job (if not passed a NULL) + * IN allocate - resource allocation request if set rather than job submit + * IN will_run - job is not to be created, test of validity only + * OUT new_job_id - the job's ID + * OUT job_rec_ptr - pointer to the job (NULL on error) + * RET 0 on success, otherwise ESLURM error code * globals: job_list - pointer to global job list * list_part - global list of partition info * default_part_loc - pointer to default partition * job_hash, job_hash_over, max_hash_over - hash table into job records */ -int -job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, int allocate, - int will_run, struct job_record **job_rec_ptr, uid_t submit_uid) +static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, + int allocate, int will_run, + struct job_record **job_rec_ptr, uid_t submit_uid) { int error_code, i; struct part_record *part_ptr; bitstr_t *req_bitmap = NULL; - if ((error_code = validate_job_desc(job_desc, allocate))) + if ((error_code = _validate_job_desc(job_desc, allocate))) return error_code; /* find selected partition */ @@ -1224,33 +1247,35 @@ job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, int allocate, part_ptr = list_find_first(part_list, &list_find_part, job_desc->partition); if (part_ptr == NULL) { - info("job_create: invalid partition specified: %s", + info("_job_create: invalid partition specified: %s", job_desc->partition); error_code = ESLURM_INVALID_PARTITION_NAME; return error_code; } } else { if (default_part_loc == NULL) { - error("job_create: default partition not set."); + error("_job_create: default partition not set."); error_code = ESLURM_DEFAULT_PARTITION_NOT_SET; return error_code; } part_ptr = default_part_loc; } - if (job_desc->time_limit == NO_VAL) /* Default time_limit is partition maximum */ + if (job_desc->time_limit == NO_VAL) + /* Default time_limit is partition maximum */ job_desc->time_limit = part_ptr->max_time; /* can this user access this partition */ if ((part_ptr->root_only) && (submit_uid != 0)) { error - ("job_create: non-root job submission to partition %s by uid %u", + ("_job_create: non-root job submission to partition %s by uid %u", part_ptr->name, (unsigned int) submit_uid); error_code = ESLURM_ACCESS_DENIED; return error_code; } if (validate_group(part_ptr, submit_uid) == 0) { - info("job_create: job lacks group required of partition %s, uid %u", part_ptr->name, (unsigned int) submit_uid); + info("_job_create: job lacks group required of partition %s, uid %u", + part_ptr->name, (unsigned int) submit_uid); error_code = ESLURM_JOB_MISSING_REQUIRED_PARTITION_GROUP; return error_code; } @@ -1271,7 +1296,8 @@ job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, int allocate, if (job_desc->contiguous) bit_fill_gaps(req_bitmap); if (bit_super_set(req_bitmap, part_ptr->node_bitmap) != 1) { - info("job_create: requested nodes %s not in partition %s", job_desc->req_nodes, part_ptr->name); + info("_job_create: requested nodes %s not in partition %s", + job_desc->req_nodes, part_ptr->name); error_code = ESLURM_REQUESTED_NODES_NOT_IN_PARTITION; goto cleanup; @@ -1284,7 +1310,8 @@ job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, int allocate, job_desc->num_nodes = i; } if (job_desc->num_procs > part_ptr->total_cpus) { - info("job_create: too many cpus (%d) requested of partition %s(%d)", job_desc->num_procs, part_ptr->name, part_ptr->total_cpus); + info("_job_create: too many cpus (%d) requested of partition %s(%d)", + job_desc->num_procs, part_ptr->name, part_ptr->total_cpus); error_code = ESLURM_TOO_MANY_REQUESTED_CPUS; goto cleanup; } @@ -1294,7 +1321,8 @@ job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, int allocate, i = part_ptr->max_nodes; else i = part_ptr->total_nodes; - info("job_create: too many nodes (%d) requested of partition %s(%d)", job_desc->num_nodes, part_ptr->name, i); + info("_job_create: too many nodes (%d) requested of partition %s(%d)", + job_desc->num_nodes, part_ptr->name, i); error_code = ESLURM_TOO_MANY_REQUESTED_NODES; goto cleanup; } @@ -1303,25 +1331,25 @@ job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, int allocate, * malicious user filling slurmctld's memory */ if (job_desc->err && (strlen(job_desc->err) > BUF_SIZE)) { - info("job_create: strlen(err) too big (%d)", + info("_job_create: strlen(err) too big (%d)", strlen(job_desc->err)); error_code = ESLURM_PATHNAME_TOO_LONG; goto cleanup; } if (job_desc->in && (strlen(job_desc->in) > BUF_SIZE)) { - info("job_create: strlen(in) too big (%d)", + info("_job_create: strlen(in) too big (%d)", strlen(job_desc->in)); error_code = ESLURM_PATHNAME_TOO_LONG; goto cleanup; } if (job_desc->out && (strlen(job_desc->out) > BUF_SIZE)) { - info("job_create: strlen(out) too big (%d)", + info("_job_create: strlen(out) too big (%d)", strlen(job_desc->out)); error_code = ESLURM_PATHNAME_TOO_LONG; goto cleanup; } if (job_desc->work_dir && (strlen(job_desc->work_dir) > BUF_SIZE)) { - info("job_create: strlen(work_dir) too big (%d)", + info("_job_create: strlen(work_dir) too big (%d)", strlen(job_desc->work_dir)); error_code = ESLURM_PATHNAME_TOO_LONG; goto cleanup; @@ -1354,7 +1382,8 @@ job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, int allocate, if (part_ptr->shared == SHARED_FORCE) /* shared=force */ (*job_rec_ptr)->details->shared = 1; - else if (((*job_rec_ptr)->details->shared != 1) || (part_ptr->shared == SHARED_NO)) /* can't share */ + else if (((*job_rec_ptr)->details->shared != 1) || + (part_ptr->shared == SHARED_NO)) /* can't share */ (*job_rec_ptr)->details->shared = 0; *new_job_id = (*job_rec_ptr)->job_id; @@ -1380,7 +1409,11 @@ _copy_job_desc_to_file(job_desc_msg_t * job_desc, uint32_t job_id) /* Create job_id specific directory */ sprintf(job_dir, "/job.%d", job_id); xstrcat(dir_name, job_dir); - (void) mkdir2(dir_name, 0700); + if (mkdir(dir_name, 0700)) { + error("mkdir(%s) error %m", dir_name); + xfree(dir_name); + return ESLURM_WRITING_TO_FILE; + } /* Create environment file, and write data to it */ file_name = xstrdup(dir_name); @@ -1403,58 +1436,12 @@ _copy_job_desc_to_file(job_desc_msg_t * job_desc, uint32_t job_id) return error_code; } -/* mkdir2 - create a directory, does system call if root, runs mkdir otherwise */ -int mkdir2(char *path, int modes) -{ - char *cmd; - int error_code; - struct stat sbuf; - - if (stat(path, &sbuf) == 0) - return EEXIST; - - if (getuid() == 0) { - if (mknod(path, S_IFDIR | modes, 0)) - return errno; - } - - else { - cmd = xstrdup("/bin/mkdir "); - xstrcat(cmd, path); - error_code = system(cmd); - xfree(cmd); - if (error_code) - return error_code; - (void) chmod(path, modes); - } - - return SLURM_SUCCESS; -} - -/* rmdir2 - Remove a directory, does system call if root, runs rmdir otherwise */ -int rmdir2(char *path) -{ - char *cmd; - int error_code; - - if (getuid() == 0) { - if (unlink(path)) - return errno; - } - - else { - cmd = xstrdup("/bin/rmdir "); - xstrcat(cmd, path); - error_code = system(cmd); - xfree(cmd); - if (error_code) - return error_code; - } - - return SLURM_SUCCESS; -} - -/* Create file with specified name and write the supplied data array to it */ +/* + * Create file with specified name and write the supplied data array to it + * IN file_name - file to create and write to + * IN data - array of pointers to strings (e.g. env) + * IN size - number of elements in data + */ static int _write_data_array_to_file(char *file_name, char **data, uint16_t size) { @@ -1498,7 +1485,11 @@ _write_data_array_to_file(char *file_name, char **data, uint16_t size) return SLURM_SUCCESS; } -/* Create file with specified name and write the supplied data to it */ +/* + * Create file with specified name and write the supplied data array to it + * IN file_name - file to create and write to + * IN data - pointer to string + */ static int _write_data_to_file(char *file_name, char *data) { int fd, pos, nwrite, amount; @@ -1530,7 +1521,13 @@ static int _write_data_to_file(char *file_name, char *data) return SLURM_SUCCESS; } -/* get_job_env - return the environment variables and their count for a given job */ +/* + * get_job_env - return the environment variables and their count for a + * given job + * IN job_ptr - pointer to job for which data is required + * OUT env_size - number of elements to read + * RET point to array of string pointers containing environment variables + */ char **get_job_env(struct job_record *job_ptr, uint16_t * env_size) { char job_dir[30], *file_name, **environment = NULL; @@ -1539,13 +1536,17 @@ char **get_job_env(struct job_record *job_ptr, uint16_t * env_size) sprintf(job_dir, "/job.%d/environment", job_ptr->job_id); xstrcat(file_name, job_dir); - read_data_array_from_file(file_name, &environment, env_size); + _read_data_array_from_file(file_name, &environment, env_size); xfree(file_name); return environment; } -/* get_job_script - return the script for a given job */ +/* + * get_job_script - return the script for a given job + * IN job_ptr - pointer to job for which data is required + * RET point to string containing job script + */ char *get_job_script(struct job_record *job_ptr) { char job_dir[30], *file_name, *script = NULL; @@ -1554,21 +1555,28 @@ char *get_job_script(struct job_record *job_ptr) sprintf(job_dir, "/job.%d/script", job_ptr->job_id); xstrcat(file_name, job_dir); - read_data_from_file(file_name, &script); + _read_data_from_file(file_name, &script); xfree(file_name); return script; } -void -read_data_array_from_file(char *file_name, char ***data, uint16_t * size) +/* + * Read a collection of strings from a file + * IN file_name - file to read from + * OUT data - pointer to array of pointers to strings (e.g. env), + * must be xfreed when no longer needed + * OUT size - number of elements in data + */ +static void +_read_data_array_from_file(char *file_name, char ***data, uint16_t * size) { int fd, pos, buf_size, amount, i; char *buffer, **array_ptr; uint16_t rec_cnt; if ((file_name == NULL) || (data == NULL) || (size == NULL)) - fatal("read_data_array_from_file passed NULL pointer"); + fatal("_read_data_array_from_file passed NULL pointer"); *data = NULL; *size = 0; @@ -1620,13 +1628,19 @@ read_data_array_from_file(char *file_name, char ***data, uint16_t * size) return; } -void read_data_from_file(char *file_name, char **data) +/* + * Read a string from a file + * IN file_name - file to read from + * OUT data - pointer to string + * must be xfreed when no longer needed + */ +void _read_data_from_file(char *file_name, char **data) { int fd, pos, buf_size, amount; char *buffer; if ((file_name == NULL) || (data == NULL)) - fatal("read_data_from_file passed NULL pointer"); + fatal("_read_data_from_file passed NULL pointer"); *data = NULL; fd = open(file_name, 0); @@ -1678,8 +1692,8 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, if (job_desc->job_id != NO_VAL) job_ptr->job_id = job_desc->job_id; else - set_job_id(job_ptr); - add_job_hash(job_ptr); + _set_job_id(job_ptr); + _add_job_hash(job_ptr); if (job_desc->name) { strncpy(job_ptr->name, job_desc->name, @@ -1692,7 +1706,7 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, NO_VAL) /* also check submit UID is root */ ) job_ptr->priority = job_desc->priority; else - set_job_prio(job_ptr); + _set_job_prio(job_ptr); if (job_desc->kill_on_node_fail != (uint16_t) NO_VAL) job_ptr->kill_on_node_fail = job_desc->kill_on_node_fail; @@ -1742,96 +1756,6 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, return SLURM_SUCCESS; } -/* - * job_step_cancel - cancel the specified job step - * input: job_id, step_id - id of the job to be cancelled - * uid - user id of user issuing the RPC - * output: returns 0 on success, otherwise ESLURM error code - * global: job_list - pointer global job list - * last_job_update - time of last job table update - */ -int job_step_cancel(uint32_t job_id, uint32_t step_id, uid_t uid) -{ - struct job_record *job_ptr; - int error_code; - - job_ptr = find_job_record(job_id); - if (job_ptr == NULL) { - - info("job_step_cancel: invalid job id %u", job_id); - return ESLURM_INVALID_JOB_ID; - } - - if ((job_ptr->job_state == JOB_FAILED) || - (job_ptr->job_state == JOB_COMPLETE) || - (job_ptr->job_state == JOB_TIMEOUT)) - return ESLURM_ALREADY_DONE; - - if ((job_ptr->user_id != uid) && (uid != 0) && (uid != getuid())) { - error("Security violation, JOB_CANCEL RPC from uid %d", - uid); - return ESLURM_USER_ID_MISSING; - } - - if (job_ptr->job_state == JOB_RUNNING) { - last_job_update = time(NULL); - error_code = delete_step_record(job_ptr, step_id); - if (error_code == ENOENT) { - info("job_step_cancel step %u.%u not found", - job_id, step_id); - return ESLURM_ALREADY_DONE; - } - - job_ptr->time_last_active = time(NULL); - return SLURM_SUCCESS; - } - - info("job_step_cancel: step %u.%u can't be cancelled from state=%s", - job_id, step_id, job_state_string(job_ptr->job_state)); - return ESLURM_TRANSITION_STATE_NO_UPDATE; - -} - -/* - * job_step_complete - note normal completion the specified job step - * input: job_id, step_id - id of the job to be completed - * uid - user id of user issuing RPC - * output: returns 0 on success, otherwise ESLURM error code - * global: job_list - pointer global job list - * last_job_update - time of last job table update - */ -int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid) -{ - struct job_record *job_ptr; - int error_code; - - job_ptr = find_job_record(job_id); - if (job_ptr == NULL) { - info("job_step_complete: invalid job id %u", job_id); - return ESLURM_INVALID_JOB_ID; - } - - if ((job_ptr->job_state == JOB_FAILED) || - (job_ptr->job_state == JOB_COMPLETE) || - (job_ptr->job_state == JOB_TIMEOUT)) - return ESLURM_ALREADY_DONE; - - if ((job_ptr->user_id != uid) && (uid != 0) && (uid != getuid())) { - error("Security violation, JOB_COMPLETE RPC from uid %d", - uid); - return ESLURM_USER_ID_MISSING; - } - - last_job_update = time(NULL); - error_code = delete_step_record(job_ptr, step_id); - if (error_code == ENOENT) { - info("job_step_complete step %u.%u not found", job_id, - step_id); - return ESLURM_ALREADY_DONE; - } - return SLURM_SUCCESS; -} - /* * job_time_limit - terminate jobs which have exceeded their time limit * global: job_list - pointer global job list @@ -1882,28 +1806,31 @@ void job_time_limit(void) list_iterator_destroy(job_record_iterator); } -/* validate_job_desc - validate that a job descriptor for job submit or - * allocate has valid data, set values to defaults as required */ -int validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate) +/* _validate_job_desc - validate that a job descriptor for job submit or + * allocate has valid data, set values to defaults as required + * IN job_desc_msg - pointer to job descriptor + * IN allocate - if clear job to be queued, if set allocate for user now + */ +static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate) { - if ((job_desc_msg->num_procs == NO_VAL) - && (job_desc_msg->num_nodes == NO_VAL) - && (job_desc_msg->req_nodes == NULL)) { - info("job_create: job failed to specify ReqNodes, TotalNodes or TotalProcs"); + if ((job_desc_msg->num_procs == NO_VAL) && + (job_desc_msg->num_nodes == NO_VAL) && + (job_desc_msg->req_nodes == NULL)) { + info("_job_create: job failed to specify ReqNodes, TotalNodes or TotalProcs"); return ESLURM_JOB_MISSING_SIZE_SPECIFICATION; } - if (allocate == SLURM_CREATE_JOB_FLAG_NO_ALLOCATE_0 && - job_desc_msg->script == NULL) { - info("job_create: job failed to specify Script"); + if ((allocate == SLURM_CREATE_JOB_FLAG_NO_ALLOCATE_0) && + (job_desc_msg->script == NULL)) { + info("_job_create: job failed to specify Script"); return ESLURM_JOB_SCRIPT_MISSING; } if (job_desc_msg->user_id == NO_VAL) { - info("job_create: job failed to specify User"); + info("_job_create: job failed to specify User"); return ESLURM_USER_ID_MISSING; } - if (job_desc_msg->name - && strlen(job_desc_msg->name) > MAX_NAME_LEN) { - info("job_create: job name %s too long", + if ((job_desc_msg->name) && + (strlen(job_desc_msg->name) > MAX_NAME_LEN)) { + info("_job_create: job name %s too long", job_desc_msg->name); return ESLURM_JOB_NAME_TOO_LONG; } @@ -1914,9 +1841,9 @@ int validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate) if (job_desc_msg->shared == NO_VAL) job_desc_msg->shared = 0; - if (job_desc_msg->job_id != NO_VAL && - find_job_record((uint32_t) job_desc_msg->job_id)) { - info("job_create: Duplicate job id %d", + if ((job_desc_msg->job_id != NO_VAL) && + (find_job_record((uint32_t) job_desc_msg->job_id))) { + info("_job_create: Duplicate job id %d", job_desc_msg->job_id); return ESLURM_DUPLICATE_JOB_ID; } @@ -1925,38 +1852,38 @@ int validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate) if (job_desc_msg->num_nodes == NO_VAL) job_desc_msg->num_nodes = 1; /* default node count of 1 */ if (job_desc_msg->min_memory == NO_VAL) - job_desc_msg->min_memory = 1; /* default is 1 MB memory per node */ + job_desc_msg->min_memory = 1; /* default 1 MB mem per node */ if (job_desc_msg->min_tmp_disk == NO_VAL) - job_desc_msg->min_tmp_disk = 1; /* default is 1 MB disk per node */ + job_desc_msg->min_tmp_disk = 1; /* default 1 MB disk per node */ if (job_desc_msg->shared == NO_VAL) - job_desc_msg->shared = 0; /* default is not shared nodes */ + job_desc_msg->shared = 0; /* default not shared nodes */ if (job_desc_msg->min_procs == NO_VAL) - job_desc_msg->min_procs = 1; /* default is 1 processor per node */ + job_desc_msg->min_procs = 1; /* default 1 cpu per node */ return SLURM_SUCCESS; } /* - * list_delete_job - delete a job record and its corresponding job_details, + * _list_delete_job - delete a job record and its corresponding job_details, * see common/list.h for documentation - * input: job_entry - pointer to job_record to delete + * IN job_entry - pointer to job_record to delete * global: job_list - pointer to global job list * job_count - count of job list entries * job_hash, job_hash_over, max_hash_over - hash table into job records */ -void list_delete_job(void *job_entry) +static void _list_delete_job(void *job_entry) { struct job_record *job_record_point; int i, j; job_record_point = (struct job_record *) job_entry; if (job_record_point == NULL) - fatal("list_delete_job: passed null job pointer"); + fatal("_list_delete_job: passed null job pointer"); if (job_record_point->magic != JOB_MAGIC) - fatal("list_delete_job: passed invalid job pointer"); + fatal("_list_delete_job: passed invalid job pointer"); - if (job_hash[job_hash_inx(job_record_point->job_id)] == + if (job_hash[JOB_HASH_INX(job_record_point->job_id)] == job_record_point) - job_hash[job_hash_inx(job_record_point->job_id)] = NULL; + job_hash[JOB_HASH_INX(job_record_point->job_id)] = NULL; else { for (i = 0; i < max_hash_over; i++) { if (job_hash_over[i] != job_record_point) @@ -1987,25 +1914,11 @@ void list_delete_job(void *job_entry) /* - * list_find_job_id - find an entry in the job list, - * see common/list.h for documentation, key is the job's id - * global- job_list - the global partition list - */ -int list_find_job_id(void *job_entry, void *key) -{ - if (((struct job_record *) job_entry)->job_id == - *((uint32_t *) key)) - return 1; - return SLURM_SUCCESS; -} - - -/* - * list_find_job_old - find an entry in the job list, + * _list_find_job_old - find an entry in the job list, * see common/list.h for documentation, key is ignored * global- job_list - the global partition list */ -int list_find_job_old(void *job_entry, void *key) +int _list_find_job_old(void *job_entry, void *key) { time_t min_age; @@ -2027,14 +1940,11 @@ int list_find_job_old(void *job_entry, void *key) /* * pack_all_jobs - dump all job information for all jobs in * machine independent form (for network transmission) - * input: buffer_ptr - location into which a pointer to the data is to be stored. - * the calling function must xfree the storage. - * buffer_size - location into which the size of the created buffer is in bytes - * update_time - dump new data only if job records updated since time - * specified, otherwise return empty buffer - * output: buffer_ptr - the pointer is set to the allocated buffer. - * buffer_size - set to size of the buffer in bytes - * update_time - set to time partition records last updated + * OUT buffer_ptr - the pointer is set to the allocated buffer. + * OUT buffer_size - set to size of the buffer in bytes + * IN/OUT update_time - dump new data only if job records updated since time + * specified, otherwise return empty buffer, set to time partition + * records last updated * global: job_list - global list of job records * NOTE: the buffer at *buffer_ptr must be xfreed by the caller * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c @@ -2088,8 +1998,9 @@ pack_all_jobs(char **buffer_ptr, int *buffer_size, time_t * update_time) /* * pack_job - dump all configuration information about a specific job in * machine independent form (for network transmission) - * dump_job_ptr (I) - pointer to job for which information is requested - * buffer (I/O) - buffer in which data is place, pointers automatically updated + * IN dump_job_ptr - pointer to job for which information is requested + * IN/OUT buffer - buffer in which data is placed, pointers automatically + * updated * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c * whenever the data format changes */ @@ -2182,7 +2093,7 @@ void purge_old_job(void) { int i; - i = list_delete_all(job_list, &list_find_job_old, ""); + i = list_delete_all(job_list, &_list_find_job_old, ""); if (i) { info("purge_old_job: purged %d old job records", i); last_job_update = time(NULL); @@ -2197,7 +2108,7 @@ void purge_old_job(void) * global: last_job_update - time of last job table update * job_list - pointer to global job list */ -void reset_job_bitmaps() +void reset_job_bitmaps(void) { ListIterator job_record_iterator; struct job_record *job_record_point; @@ -2240,10 +2151,10 @@ void reset_job_bitmaps() /* - * set_job_id - set a default job_id, insure that it is unique - * input: job_ptr - pointer to the job_record + * _set_job_id - set a default job_id, insure that it is unique + * IN job_ptr - pointer to the job_record */ -void set_job_id(struct job_record *job_ptr) +static void _set_job_id(struct job_record *job_ptr) { uint32_t new_id; @@ -2251,10 +2162,10 @@ void set_job_id(struct job_record *job_ptr) job_id_sequence = slurmctld_conf.first_job_id; if ((job_ptr == NULL) || (job_ptr->magic != JOB_MAGIC)) - fatal("set_job_id: invalid job_ptr"); + fatal("_set_job_id: invalid job_ptr"); if ((job_ptr->partition == NULL) || (strlen(job_ptr->partition) == 0)) - fatal("set_job_id: partition not set"); + fatal("_set_job_id: partition not set"); /* Include below code only if fear of rolling over 32 bit job IDs */ while (1) { @@ -2268,25 +2179,25 @@ void set_job_id(struct job_record *job_ptr) /* - * set_job_prio - set a default job priority - * input: job_ptr - pointer to the job_record + * _set_job_prio - set a default job priority + * IN job_ptr - pointer to the job_record * NOTE: this is a simple prototype, we need to re-establish value on restart */ -void set_job_prio(struct job_record *job_ptr) +static void _set_job_prio(struct job_record *job_ptr) { if ((job_ptr == NULL) || (job_ptr->magic != JOB_MAGIC)) - fatal("set_job_prio: invalid job_ptr"); + fatal("_set_job_prio: invalid job_ptr"); job_ptr->priority = default_prio--; } /* - * top_priority - determine if any other job for this partition has a higher priority - * than specified job - * input: job_ptr - pointer to selected partition - * output: returns 1 if selected job has highest priority, 0 otherwise + * _top_priority - determine if any other job for this partition has a + * higher priority than specified job + * IN job_ptr - pointer to selected partition + * RET 1 if selected job has highest priority, 0 otherwise */ -int top_priority(struct job_record *job_ptr) +static int _top_priority(struct job_record *job_ptr) { ListIterator job_record_iterator; struct job_record *job_record_point; @@ -2297,7 +2208,7 @@ int top_priority(struct job_record *job_ptr) while ((job_record_point = (struct job_record *) list_next(job_record_iterator))) { if (job_record_point->magic != JOB_MAGIC) - fatal("top_priority: job integrity is bad"); + fatal("_top_priority: job integrity is bad"); if (job_record_point == job_ptr) continue; if (job_record_point->job_state != JOB_PENDING) @@ -2316,8 +2227,9 @@ int top_priority(struct job_record *job_ptr) /* * update_job - update a job's parameters per the supplied specifications - * input: uid - uid of user issuing RPC - * output: returns an error code from common/slurm_errno.h + * IN job_specs - a job's specification + * IN uid - uid of user issuing RPC + * RET returns an error code from common/slurm_errno.h * global: job_list - global list of job entries * last_job_update - time of last job table update */ @@ -2354,7 +2266,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) job_ptr->end_time = job_ptr->start_time + (job_ptr->time_limit * 60); - info("update_job: setting time_limit to %u for job_id %u", job_specs->time_limit, job_specs->job_id); + info("update_job: setting time_limit to %u for job_id %u", + job_specs->time_limit, job_specs->job_id); } else { error("Attempt to increase time limit for job %u", job_specs->job_id); @@ -2366,7 +2279,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (super_user || (job_ptr->priority > job_specs->priority)) { job_ptr->priority = job_specs->priority; - info("update_job: setting priority to %u for job_id %u", job_specs->priority, job_specs->job_id); + info("update_job: setting priority to %u for job_id %u", + job_specs->priority, job_specs->job_id); } else { error("Attempt to increase priority for job %u", job_specs->job_id); @@ -2378,7 +2292,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (super_user || (detail_ptr->min_procs > job_specs->min_procs)) { detail_ptr->min_procs = job_specs->min_procs; - info("update_job: setting min_procs to %u for job_id %u", job_specs->min_procs, job_specs->job_id); + info("update_job: setting min_procs to %u for job_id %u", + job_specs->min_procs, job_specs->job_id); } else { error("Attempt to increase min_procs for job %u", job_specs->job_id); @@ -2390,7 +2305,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (super_user || (detail_ptr->min_memory > job_specs->min_memory)) { detail_ptr->min_memory = job_specs->min_memory; - info("update_job: setting min_memory to %u for job_id %u", job_specs->min_memory, job_specs->job_id); + info("update_job: setting min_memory to %u for job_id %u", + job_specs->min_memory, job_specs->job_id); } else { error("Attempt to increase min_memory for job %u", job_specs->job_id); @@ -2402,7 +2318,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (super_user || (detail_ptr->min_tmp_disk > job_specs->min_tmp_disk)) { detail_ptr->min_tmp_disk = job_specs->min_tmp_disk; - info("update_job: setting min_tmp_disk to %u for job_id %u", job_specs->min_tmp_disk, job_specs->job_id); + info("update_job: setting min_tmp_disk to %u for job_id %u", + job_specs->min_tmp_disk, job_specs->job_id); } else { error ("Attempt to increase min_tmp_disk for job %u", @@ -2415,7 +2332,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (super_user || (detail_ptr->num_procs > job_specs->num_procs)) { detail_ptr->num_procs = job_specs->num_procs; - info("update_job: setting num_procs to %u for job_id %u", job_specs->num_procs, job_specs->job_id); + info("update_job: setting num_procs to %u for job_id %u", + job_specs->num_procs, job_specs->job_id); } else { error("Attempt to increase num_procs for job %u", job_specs->job_id); @@ -2427,7 +2345,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (super_user || (detail_ptr->num_nodes > job_specs->num_nodes)) { detail_ptr->num_nodes = job_specs->num_nodes; - info("update_job: setting num_nodes to %u for job_id %u", job_specs->num_nodes, job_specs->job_id); + info("update_job: setting num_nodes to %u for job_id %u", + job_specs->num_nodes, job_specs->job_id); } else { error("Attempt to increase num_nodes for job %u", job_specs->job_id); @@ -2438,7 +2357,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (job_specs->shared != (uint16_t) NO_VAL && detail_ptr) { if (super_user || (detail_ptr->shared > job_specs->shared)) { detail_ptr->shared = job_specs->shared; - info("update_job: setting shared to %u for job_id %u", job_specs->shared, job_specs->job_id); + info("update_job: setting shared to %u for job_id %u", + job_specs->shared, job_specs->job_id); } else { error("Attempt to remove sharing for job %u", job_specs->job_id); @@ -2450,7 +2370,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (super_user || (detail_ptr->contiguous > job_specs->contiguous)) { detail_ptr->contiguous = job_specs->contiguous; - info("update_job: setting contiguous to %u for job_id %u", job_specs->contiguous, job_specs->job_id); + info("update_job: setting contiguous to %u for job_id %u", + job_specs->contiguous, job_specs->job_id); } else { error("Attempt to add contiguous for job %u", job_specs->job_id); @@ -2460,7 +2381,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (job_specs->kill_on_node_fail != (uint16_t) NO_VAL) { job_ptr->kill_on_node_fail = job_specs->kill_on_node_fail; - info("update_job: setting kill_on_node_fail to %u for job_id %u", job_specs->kill_on_node_fail, job_specs->job_id); + info("update_job: setting kill_on_node_fail to %u for job_id %u", + job_specs->kill_on_node_fail, job_specs->job_id); } if (job_specs->features && detail_ptr) { @@ -2468,7 +2390,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (detail_ptr->features) xfree(detail_ptr->features); detail_ptr->features = job_specs->features; - info("update_job: setting features to %s for job_id %u", job_specs->features, job_specs->job_id); + info("update_job: setting features to %s for job_id %u", + job_specs->features, job_specs->job_id); job_specs->features = NULL; } else { error("Attempt to change features for job %u", @@ -2491,7 +2414,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) strncpy(job_ptr->partition, job_specs->partition, MAX_NAME_LEN); job_ptr->part_ptr = tmp_part_ptr; - info("update_job: setting partition to %s for job_id %u", job_specs->partition, job_specs->job_id); + info("update_job: setting partition to %s for job_id %u", + job_specs->partition, job_specs->job_id); job_specs->partition = NULL; } else { error("Attempt to change partition for job %u", @@ -2521,7 +2445,8 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) bit_free(detail_ptr-> req_node_bitmap); detail_ptr->req_node_bitmap = req_bitmap; - info("update_job: setting req_nodes to %s for job_id %u", job_specs->req_nodes, job_specs->job_id); + info("update_job: setting req_nodes to %s for job_id %u", + job_specs->req_nodes, job_specs->job_id); job_specs->req_nodes = NULL; } } else { @@ -2535,9 +2460,16 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) } -/* validate_jobs_on_node - validate that any jobs that should be on the node are - * actually running, if not clean up the job records and/or node records, - * call this function after validate_node_specs() sets the node state properly */ +/* + * validate_jobs_on_node - validate that any jobs that should be on the node + * are actually running, if not clean up the job records and/or node + * records, call this function after validate_node_specs() sets the node + * state properly + * IN node_name - node which should have jobs running + * IN job_count - number of jobs which should be running on specified node + * IN job_id_ptr - pointer to array of job_ids that should be on this node + * IN step_id_ptr - pointer to array of job step ids that should be on node + */ void validate_jobs_on_node(char *node_name, uint32_t * job_count, uint32_t * job_id_ptr, uint16_t * step_id_ptr) @@ -2566,33 +2498,37 @@ validate_jobs_on_node(char *node_name, uint32_t * job_count, /* FIXME: In the future try to let job run */ error("Orphan job_id %u reported on node %s", job_id_ptr[i], node_name); - signal_job_on_node(job_id_ptr[i], step_id_ptr[i], - SIGKILL, node_name); - /* We may well have pending purge job RPC to send slurmd, - * which would synchronize this */ + _signal_job_on_node(job_id_ptr[i], step_id_ptr[i], + SIGKILL, node_name); + /* We may well have pending purge job RPC to send + * slurmd, which would synchronize this */ } else if (job_ptr->job_state == JOB_RUNNING) { if (bit_test(job_ptr->node_bitmap, node_inx)) { jobs_running++; - debug3("Registered job_id %u on node %s ", job_id_ptr[i], node_name); /* All is well */ + debug3("Registered job_id %u on node %s ", + job_id_ptr[i], node_name); } else { - error("REGISTERED JOB_ID %u ON WRONG NODE %s ", job_id_ptr[i], node_name); /* Very bad */ - signal_job_on_node(job_id_ptr[i], - step_id_ptr[i], SIGKILL, - node_name); + error + ("REGISTERED JOB_ID %u ON WRONG NODE %s ", + job_id_ptr[i], node_name); + _signal_job_on_node(job_id_ptr[i], + step_id_ptr[i], + SIGKILL, node_name); } } else if (job_ptr->job_state == JOB_PENDING) { /* FIXME: In the future try to let job run */ - error("REGISTERED PENDING JOB_ID %u ON NODE %s ", job_id_ptr[i], node_name); /* Very bad */ + error("REGISTERED PENDING JOB_ID %u ON NODE %s ", + job_id_ptr[i], node_name); job_ptr->job_state = JOB_FAILED; last_job_update = time(NULL); job_ptr->end_time = time(NULL); delete_job_details(job_ptr); - signal_job_on_node(job_id_ptr[i], step_id_ptr[i], - SIGKILL, node_name); + _signal_job_on_node(job_id_ptr[i], step_id_ptr[i], + SIGKILL, node_name); } else { /* else job is supposed to be done */ @@ -2601,10 +2537,10 @@ validate_jobs_on_node(char *node_name, uint32_t * job_count, job_id_ptr[i], job_state_string(job_ptr->job_state), node_name); - signal_job_on_node(job_id_ptr[i], step_id_ptr[i], - SIGKILL, node_name); - /* We may well have pending purge job RPC to send slurmd, - * which would synchronize this */ + _signal_job_on_node(job_id_ptr[i], step_id_ptr[i], + SIGKILL, node_name); + /* We may well have pending purge job RPC to send + * slurmd, which would synchronize this */ } } @@ -2616,10 +2552,11 @@ validate_jobs_on_node(char *node_name, uint32_t * job_count, return; } -/* signal_job_on_node - send specific signal to specific job_id, step_id and node_name */ -void -signal_job_on_node(uint32_t job_id, uint16_t step_id, int signum, - char *node_name) +/* _signal_job_on_node - send specific signal to specific job_id, step_id + * and node_name */ +static void +_signal_job_on_node(uint32_t job_id, uint16_t step_id, int signum, + char *node_name) { /* FIXME: add code to send RPC to specified node */ debug("Signal %d send to job %u.%u on node %s", @@ -2628,7 +2565,12 @@ signal_job_on_node(uint32_t job_id, uint16_t step_id, int signum, } -/* old_job_info - get details about an existing job allocation */ +/* + * old_job_info - get details about an existing job allocation + * IN uid - job issuing the code + * IN job_id - ID of job for which info is requested + * OUT everything else - the job's detains + */ int old_job_info(uint32_t uid, uint32_t job_id, char **node_list, uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, @@ -2661,12 +2603,3 @@ old_job_info(uint32_t uid, uint32_t job_id, char **node_list, *node_addr = job_ptr->node_addr; return SLURM_SUCCESS; } - - -static inline void x_clear(void *arg) -{ - if (arg) { - xfree(arg); - arg = NULL; - } -} diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 09e822bfe4d..0dab3ddf3e8 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -26,7 +26,7 @@ \*****************************************************************************/ #ifdef HAVE_CONFIG_H -# include <config.h> +# include "config.h" #endif #include <errno.h> @@ -35,31 +35,30 @@ #include <string.h> #include <unistd.h> -#include <src/common/list.h> -#include <src/common/xstring.h> -#include <src/slurmctld/agent.h> -#include <src/slurmctld/locks.h> -#include <src/slurmctld/slurmctld.h> +#include "src/common/list.h" +#include "src/common/xstring.h" +#include "src/slurmctld/agent.h" +#include "src/slurmctld/locks.h" +#include "src/slurmctld/slurmctld.h" struct job_queue { int priority; struct job_record *job_ptr; }; -static int build_job_queue (struct job_queue **job_queue); -static void launch_job (struct job_record *job_ptr); -static void sort_job_queue (struct job_queue *job_queue, int job_queue_size); +static int _build_job_queue(struct job_queue **job_queue); +static void _launch_job(struct job_record *job_ptr); +static void _sort_job_queue(struct job_queue *job_queue, + int job_queue_size); /* - * build_job_queue - build (non-priority ordered) list of pending jobs - * input: job_queue - storage location for job queue - * output: job_queue - pointer to job queue - * returns - number of entries in job_queue + * _build_job_queue - build (non-priority ordered) list of pending jobs + * OUT job_queue - pointer to job queue + * RET number of entries in job_queue * global: job_list - global list of job records * NOTE: the buffer at *job_queue must be xfreed by the caller */ -int -build_job_queue (struct job_queue **job_queue) +static int _build_job_queue(struct job_queue **job_queue) { ListIterator job_record_iterator; struct job_record *job_record_point = NULL; @@ -69,24 +68,25 @@ build_job_queue (struct job_queue **job_queue) /* build list pending jobs */ job_buffer_size = job_queue_size = 0; job_queue[0] = my_job_queue = NULL; - job_record_iterator = list_iterator_create (job_list); - - while ((job_record_point = - (struct job_record *) list_next (job_record_iterator))) { - if (job_record_point->job_state != JOB_PENDING) + job_record_iterator = list_iterator_create(job_list); + + while ((job_record_point = + (struct job_record *) list_next(job_record_iterator))) { + if (job_record_point->job_state != JOB_PENDING) continue; if (job_record_point->magic != JOB_MAGIC) - fatal ("prio_order_job: data integrity is bad"); + fatal("prio_order_job: data integrity is bad"); if (job_buffer_size <= job_queue_size) { job_buffer_size += 50; - xrealloc(my_job_queue, job_buffer_size * - sizeof (struct job_queue)); + xrealloc(my_job_queue, job_buffer_size * + sizeof(struct job_queue)); } my_job_queue[job_queue_size].job_ptr = job_record_point; - my_job_queue[job_queue_size].priority = job_record_point->priority; + my_job_queue[job_queue_size].priority = + job_record_point->priority; job_queue_size++; - } - list_iterator_destroy (job_record_iterator); + } + list_iterator_destroy(job_record_iterator); job_queue[0] = my_job_queue; return job_queue_size; @@ -97,7 +97,7 @@ build_job_queue (struct job_queue **job_queue) * schedule - attempt to schedule all pending jobs * pending jobs for each partition will be scheduled in priority * order until a request fails - * output: returns count of jobs scheduled + * RET count of jobs scheduled * global: job_list - global list of job records * last_job_update - time of last update to job table * Note: We re-build the queue every time. Jobs can not only be added @@ -105,52 +105,55 @@ build_job_queue (struct job_queue **job_queue) * changed with the update_job RPC. In general nodes will be in priority * order (by submit time), so the sorting should be pretty fast. */ -int -schedule (void) +int schedule(void) { struct job_queue *job_queue; int i, j, error_code, failed_part_cnt, job_queue_size, job_cnt = 0; struct job_record *job_ptr; struct part_record **failed_parts; /* Locks: Write job, write node, read partition */ - slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; + slurmctld_lock_t job_write_lock = + { NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; - lock_slurmctld (job_write_lock); - job_queue_size = build_job_queue (&job_queue); + lock_slurmctld(job_write_lock); + job_queue_size = _build_job_queue(&job_queue); if (job_queue_size == 0) { - unlock_slurmctld (job_write_lock); + unlock_slurmctld(job_write_lock); return SLURM_SUCCESS; } - sort_job_queue (job_queue, job_queue_size); + _sort_job_queue(job_queue, job_queue_size); failed_part_cnt = 0; failed_parts = NULL; - for (i=0; i<job_queue_size; i++) { + for (i = 0; i < job_queue_size; i++) { job_ptr = job_queue[i].job_ptr; - for (j=0; j<failed_part_cnt; j++) { + for (j = 0; j < failed_part_cnt; j++) { if (failed_parts[j] == job_ptr->part_ptr) break; } - if (j < failed_part_cnt) continue; + if (j < failed_part_cnt) + continue; error_code = select_nodes(job_ptr, 0); if (error_code == ESLURM_NODES_BUSY) { - xrealloc(failed_parts, - (failed_part_cnt+1)*sizeof(struct part_record *)); - failed_parts[failed_part_cnt++] = job_ptr->part_ptr; - } - else if (error_code == SLURM_SUCCESS) { /* job initiated */ - last_job_update = time (NULL); - info ("schedule: job_id %u on nodes %s", - job_ptr->job_id, job_ptr->nodes); - launch_job (job_ptr); + xrealloc(failed_parts, + (failed_part_cnt + + 1) * sizeof(struct part_record *)); + failed_parts[failed_part_cnt++] = + job_ptr->part_ptr; + } else if (error_code == SLURM_SUCCESS) { + /* job initiated */ + last_job_update = time(NULL); + info("schedule: job_id %u on nodes %s", + job_ptr->job_id, job_ptr->nodes); + _launch_job(job_ptr); job_cnt++; - } - else { - info ("schedule: job_id %u non-runnable, error %m", - job_ptr->job_id); - last_job_update = time (NULL); + } else { + info("schedule: job_id %u non-runnable, error %m", + job_ptr->job_id); + last_job_update = time(NULL); job_ptr->job_state = JOB_FAILED; - job_ptr->start_time = job_ptr->end_time = time(NULL); + job_ptr->start_time = job_ptr->end_time = + time(NULL); delete_job_details(job_ptr); } } @@ -159,30 +162,27 @@ schedule (void) xfree(failed_parts); if (job_queue) xfree(job_queue); - unlock_slurmctld (job_write_lock); + unlock_slurmctld(job_write_lock); return job_cnt; } /* - * sort_job_queue - sort job_queue in decending priority order - * input: job_queue - pointer to un-sorted job queue - * job_queue_size - count of elements in the job queue - * output: job_queue - pointer to sorted job queue + * _sort_job_queue - sort job_queue in decending priority order + * IN job_queue_size - count of elements in the job queue + * IN/OUT job_queue - pointer to sorted job queue */ -void -sort_job_queue (struct job_queue *job_queue, int job_queue_size) +static void _sort_job_queue(struct job_queue *job_queue, int job_queue_size) { int i, j, top_prio_inx; int tmp_prio, top_prio; struct job_record *tmp_job_ptr; - for (i=0; i<job_queue_size; i++) { + for (i = 0; i < job_queue_size; i++) { top_prio = job_queue[i].priority; top_prio_inx = i; - for (j=(i+1); j<job_queue_size; j++) { - if (top_prio >= - job_queue[j].priority) + for (j = (i + 1); j < job_queue_size; j++) { + if (top_prio >= job_queue[j].priority) continue; top_prio = job_queue[j].priority; top_prio_inx = j; @@ -194,13 +194,14 @@ sort_job_queue (struct job_queue *job_queue, int job_queue_size) job_queue[i].priority = job_queue[top_prio_inx].priority; job_queue[i].job_ptr = job_queue[top_prio_inx].job_ptr; job_queue[top_prio_inx].priority = tmp_prio; - job_queue[top_prio_inx].job_ptr = tmp_job_ptr; + job_queue[top_prio_inx].job_ptr = tmp_job_ptr; } } -/* launch_job - send an RPC to a slurmd to initiate a job */ -void -launch_job (struct job_record *job_ptr) +/* _launch_job - send an RPC to a slurmd to initiate a job + * IN job_ptr - pointer to job that will be initiated + */ +static void _launch_job(struct job_record *job_ptr) { batch_job_launch_msg_t *launch_msg_ptr; agent_arg_t *agent_arg_ptr; @@ -211,50 +212,54 @@ launch_job (struct job_record *job_ptr) if (job_ptr->details->batch_flag == 0) return; - node_ptr = find_first_node_record (job_ptr -> node_bitmap); + node_ptr = find_first_node_record(job_ptr->node_bitmap); if (node_ptr == NULL) return; /* Initialization of data structures */ - launch_msg_ptr = (batch_job_launch_msg_t *) xmalloc (sizeof (batch_job_launch_msg_t)); - launch_msg_ptr -> job_id = job_ptr -> job_id; - launch_msg_ptr -> uid = job_ptr -> user_id; - launch_msg_ptr -> nodes = xstrdup (job_ptr -> nodes); - launch_msg_ptr -> err = xstrdup (job_ptr -> details -> err); - launch_msg_ptr -> in = xstrdup (job_ptr -> details -> in); - launch_msg_ptr -> out = xstrdup (job_ptr -> details -> out); - launch_msg_ptr -> work_dir = xstrdup (job_ptr -> details -> work_dir); - launch_msg_ptr -> argc = 0; /* FIXME */ - launch_msg_ptr -> argv = NULL; /* FIXME */ - launch_msg_ptr -> script = get_job_script (job_ptr); - launch_msg_ptr -> environment = get_job_env (job_ptr, &launch_msg_ptr -> envc); + launch_msg_ptr = + (batch_job_launch_msg_t *) + xmalloc(sizeof(batch_job_launch_msg_t)); + launch_msg_ptr->job_id = job_ptr->job_id; + launch_msg_ptr->uid = job_ptr->user_id; + launch_msg_ptr->nodes = xstrdup(job_ptr->nodes); + launch_msg_ptr->err = xstrdup(job_ptr->details->err); + launch_msg_ptr->in = xstrdup(job_ptr->details->in); + launch_msg_ptr->out = xstrdup(job_ptr->details->out); + launch_msg_ptr->work_dir = xstrdup(job_ptr->details->work_dir); + launch_msg_ptr->argc = 0; /* FIXME */ + launch_msg_ptr->argv = NULL; /* FIXME */ + launch_msg_ptr->script = get_job_script(job_ptr); + launch_msg_ptr->environment = + get_job_env(job_ptr, &launch_msg_ptr->envc); - agent_arg_ptr = (agent_arg_t *) xmalloc (sizeof (agent_arg_t)); - agent_arg_ptr -> node_count = 1; - agent_arg_ptr -> retry = 1; - agent_arg_ptr -> slurm_addr = xmalloc (sizeof (struct sockaddr_in)); - memcpy (agent_arg_ptr -> slurm_addr, - &(node_ptr -> slurm_addr), sizeof (struct sockaddr_in)); - agent_arg_ptr -> node_names = xstrdup (node_ptr -> name); - agent_arg_ptr -> msg_type = REQUEST_BATCH_JOB_LAUNCH; - agent_arg_ptr -> msg_args = (void *)launch_msg_ptr; + agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t)); + agent_arg_ptr->node_count = 1; + agent_arg_ptr->retry = 1; + agent_arg_ptr->slurm_addr = xmalloc(sizeof(struct sockaddr_in)); + memcpy(agent_arg_ptr->slurm_addr, + &(node_ptr->slurm_addr), sizeof(struct sockaddr_in)); + agent_arg_ptr->node_names = xstrdup(node_ptr->name); + agent_arg_ptr->msg_type = REQUEST_BATCH_JOB_LAUNCH; + agent_arg_ptr->msg_args = (void *) launch_msg_ptr; /* Launch the RPC via agent */ - debug3 ("Spawning job launch agent for job_id %u", job_ptr -> job_id); - if (pthread_attr_init (&attr_agent)) - fatal ("pthread_attr_init error %m"); - if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) - error ("pthread_attr_setdetachstate error %m"); + debug3("Spawning job launch agent for job_id %u", job_ptr->job_id); + if (pthread_attr_init(&attr_agent)) + fatal("pthread_attr_init error %m"); + if (pthread_attr_setdetachstate + (&attr_agent, PTHREAD_CREATE_DETACHED)) + error("pthread_attr_setdetachstate error %m"); #ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (&attr_agent, PTHREAD_SCOPE_SYSTEM)) - error ("pthread_attr_setscope error %m"); + if (pthread_attr_setscope(&attr_agent, PTHREAD_SCOPE_SYSTEM)) + error("pthread_attr_setscope error %m"); #endif - if (pthread_create (&thread_agent, &attr_agent, - agent, (void *)agent_arg_ptr)) { - error ("pthread_create error %m"); - sleep (1); /* sleep and try once more */ - if (pthread_create (&thread_agent, &attr_agent, - agent, (void *)agent_arg_ptr)) - fatal ("pthread_create error %m"); + if (pthread_create(&thread_agent, &attr_agent, + agent, (void *) agent_arg_ptr)) { + error("pthread_create error %m"); + sleep(1); /* sleep and try once more */ + if (pthread_create(&thread_agent, &attr_agent, + agent, (void *) agent_arg_ptr)) + fatal("pthread_create error %m"); } } diff --git a/src/slurmctld/locks.c b/src/slurmctld/locks.c index 9dfa8e5141c..465e589d15a 100644 --- a/src/slurmctld/locks.c +++ b/src/slurmctld/locks.c @@ -25,177 +25,174 @@ \*****************************************************************************/ #ifdef HAVE_CONFIG_H -# include <config.h> +# include "config.h" +#endif + +#ifdef WITH_PTHREADS +# include <pthread.h> #endif #include <errno.h> -#include <pthread.h> #include <string.h> #include <sys/types.h> -#include <src/slurmctld/locks.h> -#include <src/slurmctld/slurmctld.h> +#include "src/slurmctld/locks.h" +#include "src/slurmctld/slurmctld.h" -pthread_mutex_t locks_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t locks_cond = PTHREAD_COND_INITIALIZER; -pthread_mutex_t state_mutex = PTHREAD_MUTEX_INITIALIZER; -slurmctld_lock_flags_t slurmctld_locks; -int kill_thread = 0; +static pthread_mutex_t locks_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t locks_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t state_mutex = PTHREAD_MUTEX_INITIALIZER; -void wr_rdlock (lock_datatype_t datatype); -void wr_rdunlock (lock_datatype_t datatype); -void wr_wrlock (lock_datatype_t datatype); -void wr_wrunlock (lock_datatype_t datatype); +static slurmctld_lock_flags_t slurmctld_locks; +static int kill_thread = 0; -/* init_locks - create locks used for slurmctld data structure access control */ -void -init_locks ( void ) +static void _wr_rdlock(lock_datatype_t datatype); +static void _wr_rdunlock(lock_datatype_t datatype); +static void _wr_wrlock(lock_datatype_t datatype); +static void _wr_wrunlock(lock_datatype_t datatype); + +/* init_locks - create locks used for slurmctld data structure access + * control */ +void init_locks(void) { /* just clear all semaphores */ - memset ((void *)&slurmctld_locks, 0, sizeof (slurmctld_locks) ); + memset((void *) &slurmctld_locks, 0, sizeof(slurmctld_locks)); } /* lock_slurmctld - Issue the required lock requests in a well defined order - * Returns 0 on success, -1 on failure */ -void -lock_slurmctld (slurmctld_lock_t lock_levels) + * RET 0 on success, -1 on failure */ +void lock_slurmctld(slurmctld_lock_t lock_levels) { if (lock_levels.config == READ_LOCK) - wr_rdlock (CONFIG_LOCK); + _wr_rdlock(CONFIG_LOCK); else if (lock_levels.config == WRITE_LOCK) - wr_wrlock (CONFIG_LOCK); + _wr_wrlock(CONFIG_LOCK); if (lock_levels.job == READ_LOCK) - wr_rdlock (JOB_LOCK); + _wr_rdlock(JOB_LOCK); else if (lock_levels.job == WRITE_LOCK) - wr_wrlock (JOB_LOCK); + _wr_wrlock(JOB_LOCK); if (lock_levels.node == READ_LOCK) - wr_rdlock (NODE_LOCK); + _wr_rdlock(NODE_LOCK); else if (lock_levels.node == WRITE_LOCK) - wr_wrlock (NODE_LOCK); + _wr_wrlock(NODE_LOCK); if (lock_levels.partition == READ_LOCK) - wr_rdlock (PART_LOCK); + _wr_rdlock(PART_LOCK); else if (lock_levels.partition == WRITE_LOCK) - wr_wrlock (PART_LOCK); + _wr_wrlock(PART_LOCK); } -/* unlock_slurmctld - Issue the required unlock requests in a well defined order */ -void -unlock_slurmctld (slurmctld_lock_t lock_levels) +/* unlock_slurmctld - Issue the required unlock requests in a well + * defined order */ +void unlock_slurmctld(slurmctld_lock_t lock_levels) { if (lock_levels.partition == READ_LOCK) - wr_rdunlock (PART_LOCK); + _wr_rdunlock(PART_LOCK); else if (lock_levels.partition == WRITE_LOCK) - wr_wrunlock (PART_LOCK); + _wr_wrunlock(PART_LOCK); if (lock_levels.node == READ_LOCK) - wr_rdunlock (NODE_LOCK); + _wr_rdunlock(NODE_LOCK); else if (lock_levels.node == WRITE_LOCK) - wr_wrunlock (NODE_LOCK); + _wr_wrunlock(NODE_LOCK); if (lock_levels.job == READ_LOCK) - wr_rdunlock (JOB_LOCK); + _wr_rdunlock(JOB_LOCK); else if (lock_levels.job == WRITE_LOCK) - wr_wrunlock (JOB_LOCK); + _wr_wrunlock(JOB_LOCK); if (lock_levels.config == READ_LOCK) - wr_rdunlock (CONFIG_LOCK); + _wr_rdunlock(CONFIG_LOCK); else if (lock_levels.config == WRITE_LOCK) - wr_wrunlock (CONFIG_LOCK); + _wr_wrunlock(CONFIG_LOCK); } -/* wr_rdlock - Issue a read lock on the specified data type */ -void -wr_rdlock (lock_datatype_t datatype) +/* _wr_rdlock - Issue a read lock on the specified data type */ +static void _wr_rdlock(lock_datatype_t datatype) { - pthread_mutex_lock (&locks_mutex); + slurm_mutex_lock(&locks_mutex); while (1) { - if ((slurmctld_locks.entity [write_wait_lock (datatype)] == 0) && - (slurmctld_locks.entity [write_lock (datatype)] == 0)) { - slurmctld_locks.entity [read_lock (datatype)]++; + if ((slurmctld_locks.entity[write_wait_lock(datatype)] == + 0) + && (slurmctld_locks.entity[write_lock(datatype)] == + 0)) { + slurmctld_locks.entity[read_lock(datatype)]++; break; - } - else { /* wait for state change and retry */ - pthread_cond_wait (&locks_cond, &locks_mutex); + } else { /* wait for state change and retry */ + pthread_cond_wait(&locks_cond, &locks_mutex); if (kill_thread) - pthread_exit (NULL); + pthread_exit(NULL); } } - pthread_mutex_unlock (&locks_mutex); + slurm_mutex_unlock(&locks_mutex); } -/* wr_rdunlock - Issue a read unlock on the specified data type */ -void -wr_rdunlock (lock_datatype_t datatype) +/* _wr_rdunlock - Issue a read unlock on the specified data type */ +static void _wr_rdunlock(lock_datatype_t datatype) { - pthread_mutex_lock (&locks_mutex); - slurmctld_locks.entity [read_lock (datatype)]--; - pthread_mutex_unlock (&locks_mutex); - pthread_cond_broadcast (&locks_cond); + slurm_mutex_lock(&locks_mutex); + slurmctld_locks.entity[read_lock(datatype)]--; + slurm_mutex_unlock(&locks_mutex); + pthread_cond_broadcast(&locks_cond); } -/* wr_wrlock - Issue a write lock on the specified data type */ -void -wr_wrlock (lock_datatype_t datatype) +/* _wr_wrlock - Issue a write lock on the specified data type */ +static void _wr_wrlock(lock_datatype_t datatype) { - pthread_mutex_lock (&locks_mutex); - slurmctld_locks.entity [write_wait_lock (datatype)]++; + slurm_mutex_lock(&locks_mutex); + slurmctld_locks.entity[write_wait_lock(datatype)]++; while (1) { - if ((slurmctld_locks.entity [read_lock (datatype)] == 0) && - (slurmctld_locks.entity [write_lock (datatype)] == 0)) { - slurmctld_locks.entity [write_lock (datatype)]++; - slurmctld_locks.entity [write_wait_lock (datatype)]--; + if ((slurmctld_locks.entity[read_lock(datatype)] == 0) && + (slurmctld_locks.entity[write_lock(datatype)] == 0)) { + slurmctld_locks.entity[write_lock(datatype)]++; + slurmctld_locks. + entity[write_wait_lock(datatype)]--; break; - } - else { /* wait for state change and retry */ - pthread_cond_wait (&locks_cond, &locks_mutex); + } else { /* wait for state change and retry */ + pthread_cond_wait(&locks_cond, &locks_mutex); if (kill_thread) - pthread_exit (NULL); + pthread_exit(NULL); } } - pthread_mutex_unlock (&locks_mutex); + slurm_mutex_unlock(&locks_mutex); } -/* wr_wrunlock - Issue a write unlock on the specified data type */ -void -wr_wrunlock (lock_datatype_t datatype) +/* _wr_wrunlock - Issue a write unlock on the specified data type */ +static void _wr_wrunlock(lock_datatype_t datatype) { - pthread_mutex_lock (&locks_mutex); - slurmctld_locks.entity [write_lock (datatype)]--; - pthread_mutex_unlock (&locks_mutex); - pthread_cond_broadcast (&locks_cond); + slurm_mutex_lock(&locks_mutex); + slurmctld_locks.entity[write_lock(datatype)]--; + slurm_mutex_unlock(&locks_mutex); + pthread_cond_broadcast(&locks_cond); } -/* get_lock_values - Get the current value of all locks */ -void -get_lock_values (slurmctld_lock_flags_t *lock_flags) +/* get_lock_values - Get the current value of all locks + * OUT lock_flags - a copy of the current lock values */ +void get_lock_values(slurmctld_lock_flags_t * lock_flags) { if (lock_flags == NULL) - fatal ("get_lock_values passed null pointer"); + fatal("get_lock_values passed null pointer"); - memcpy ((void *)lock_flags, (void *) &slurmctld_locks, sizeof (slurmctld_locks) ); + memcpy((void *) lock_flags, (void *) &slurmctld_locks, + sizeof(slurmctld_locks)); } /* kill_locked_threads - Kill all threads waiting on semaphores */ -void -kill_locked_threads ( void ) +void kill_locked_threads(void) { kill_thread = 1; - pthread_cond_broadcast (&locks_cond); + pthread_cond_broadcast(&locks_cond); } -/* locks used for saving state of slurmctld */ -void -lock_state_files ( void ) +/* un/lock semaphore used for saving state of slurmctld */ +void lock_state_files(void) { - pthread_mutex_lock (&state_mutex); + slurm_mutex_lock(&state_mutex); } -void -unlock_state_files ( void ) +void unlock_state_files(void) { - pthread_mutex_unlock (&state_mutex); + slurm_mutex_unlock(&state_mutex); } - diff --git a/src/slurmctld/locks.h b/src/slurmctld/locks.h index bf1e8171654..43701c1f670 100644 --- a/src/slurmctld/locks.h +++ b/src/slurmctld/locks.h @@ -110,11 +110,26 @@ typedef struct { } slurmctld_lock_flags_t; +/* get_lock_values - Get the current value of all locks + * OUT lock_flags - a copy of the current lock values */ extern void get_lock_values (slurmctld_lock_flags_t *lock_flags); + +/* init_locks - create locks used for slurmctld data structure access + * control */ extern void init_locks ( void ); + +/* kill_locked_threads - Kill all threads waiting on semaphores */ extern void kill_locked_threads ( void ); + +/* lock_slurmctld - Issue the required lock requests in a well defined order + * RET 0 on success, -1 on failure */ extern void lock_slurmctld (slurmctld_lock_t lock_levels); + +/* unlock_slurmctld - Issue the required unlock requests in a well + * defined order */ extern void unlock_slurmctld (slurmctld_lock_t lock_levels); + +/* un/lock semaphore used for saving state of slurmctld */ extern void inline lock_state_files ( void ); extern void inline unlock_state_files ( void ); diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 6e6c834da1c..64b7a47efde 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -28,7 +28,7 @@ \*****************************************************************************/ #ifdef HAVE_CONFIG_H -# include <config.h> +# include "config.h" #endif #include <ctype.h> @@ -41,44 +41,54 @@ #include <sys/stat.h> #include <fcntl.h> -#include <src/common/hostlist.h> -#include <src/common/pack.h> -#include <src/common/xstring.h> -#include <src/slurmctld/agent.h> -#include <src/slurmctld/locks.h> -#include <src/slurmctld/slurmctld.h> +#include "src/common/hostlist.h" +#include "src/common/pack.h" +#include "src/common/xstring.h" +#include "src/slurmctld/agent.h" +#include "src/slurmctld/locks.h" +#include "src/slurmctld/slurmctld.h" #define BUF_SIZE 4096 +/* Global variables */ List config_list = NULL; /* list of config_record entries */ -struct node_record *node_record_table_ptr = NULL; /* location of the node records */ -int *hash_table = NULL; /* table of hashed indicies into node_record */ +struct node_record *node_record_table_ptr = NULL; /* node records */ +int *hash_table = NULL; /* table of hashed indexes into + * node_record */ struct config_record default_config_record; struct node_record default_node_record; -time_t last_bitmap_update = (time_t) NULL; /* time of last node creation or deletion */ -time_t last_node_update = (time_t) NULL; /* time of last update to node records */ - +time_t last_bitmap_update = (time_t) NULL; /* time of last node creation + * or deletion */ +time_t last_node_update = (time_t) NULL; /* time of last update to + * node records */ bitstr_t *up_node_bitmap = NULL; /* bitmap of nodes are up */ bitstr_t *idle_node_bitmap = NULL; /* bitmap of nodes are idle */ -int delete_config_record (); -void dump_hash (); -void dump_node_state (struct node_record *dump_node_ptr, Buf buffer); -int hash_index (char *name); -void pack_node (struct node_record *dump_node_ptr, Buf buffer); -void split_node_name (char *name, char *prefix, char *suffix, int *index, int *digits); + +static int _delete_config_record (void); +static void _dump_node_state (struct node_record *dump_node_ptr, + Buf buffer); +static int _hash_index (char *name); +static void _list_delete_config (void *config_entry); +static int _list_find_config (void *config_entry, void *key); +static void _pack_node (struct node_record *dump_node_ptr, Buf buffer); +static void _split_node_name (char *name, char *prefix, char *suffix, + int *index, int *digits); + +#if DEBUG_SYSTEM +static void _dump_hash (void); +#endif /* - * bitmap2node_name - given a bitmap, build a list of comma separated node names. - * names may include regular expressions (e.g. "lx[01-10]") - * input: bitmap - bitmap pointer - * output: returns pointer to node list or NULL on error + * bitmap2node_name - given a bitmap, build a list of comma separated node + * names. names may include regular expressions (e.g. "lx[01-10]") + * IN bitmap - bitmap pointer + * RET pointer to node list or NULL on error * globals: node_record_table_ptr - pointer to node table * NOTE: the caller must xfree the memory at node_list when no longer required */ -char * -bitmap2node_name (bitstr_t *bitmap) +char * bitmap2node_name (bitstr_t *bitmap) { char *node_list_ptr; int node_list_size, i; @@ -106,7 +116,7 @@ bitmap2node_name (bitstr_t *bitmap) } if (bit_test (bitmap, i) == 0) continue; - split_node_name (node_record_table_ptr[i].name, prefix, + _split_node_name (node_record_table_ptr[i].name, prefix, suffix, &index, &last_digits); if ((index == (last_index + 1)) && /* next in sequence */ (strcmp (last_prefix, prefix) == 0) && @@ -175,16 +185,16 @@ bitmap2node_name (bitstr_t *bitmap) /* - * create_config_record - create a config_record entry and set is values to the defaults. - * each config record corresponds to a line in the slurm.conf file and typically - * describes the configuration of a large number of nodes - * output: returns pointer to the config_record + * create_config_record - create a config_record entry and set is values to + * the defaults. each config record corresponds to a line in the + * slurm.conf file and typically describes the configuration of a + * large number of nodes + * RET pointer to the config_record * global: default_config_record - default configuration values - * NOTE: memory allocated will remain in existence until delete_config_record() is called - * to deletet all configuration records + * NOTE: memory allocated will remain in existence until + * _delete_config_record() is called to delete all configuration records */ -struct config_record * -create_config_record (void) +struct config_record * create_config_record (void) { struct config_record *config_point; @@ -211,7 +221,7 @@ create_config_record (void) config_point->feature = (char *) NULL; if (list_append(config_list, config_point) == NULL) - fatal ("create_config_record: unable to allocate memory\n"); + fatal ("create_config_record: unable to allocate memory"); return config_point; } @@ -219,14 +229,14 @@ create_config_record (void) /* * create_node_record - create a node record and set its values to defaults - * input: config_point - pointer to node's configuration information - * node_name - name of the node - * output: returns a pointer to the record or NULL if error + * IN config_point - pointer to node's configuration information + * IN node_name - name of the node + * RET pointer to the record or NULL if error * global: default_node_record - default node values - * NOTE: the record's values are initialized to those of default_node_record, node_name and - * config_point's cpus, real_memory, and tmp_disk values - * NOTE: allocates memory at node_record_table_ptr that must be xfreed when the - * global node table is no longer required + * NOTE: the record's values are initialized to those of default_node_record, + * node_name and config_point's cpus, real_memory, and tmp_disk values + * NOTE: allocates memory at node_record_table_ptr that must be xfreed when + * the global node table is no longer required */ struct node_record * create_node_record (struct config_record *config_point, char *node_name) @@ -240,15 +250,20 @@ create_node_record (struct config_record *config_point, char *node_name) if (node_name == NULL) fatal ("create_node_record: node_name is NULL"); if (strlen (node_name) >= MAX_NAME_LEN) - fatal ("create_node_record: node_name too long: %s", node_name); + fatal ("create_node_record: node_name too long: %s", + node_name); /* round up the buffer size to reduce overhead of xrealloc */ old_buffer_size = (node_record_count) * sizeof (struct node_record); - old_buffer_size = ((int) ((old_buffer_size / BUF_SIZE) + 1)) * BUF_SIZE; - new_buffer_size = (node_record_count + 1) * sizeof (struct node_record); - new_buffer_size = ((int) ((new_buffer_size / BUF_SIZE) + 1)) * BUF_SIZE; + old_buffer_size = + ((int) ((old_buffer_size / BUF_SIZE) + 1)) * BUF_SIZE; + new_buffer_size = + (node_record_count + 1) * sizeof (struct node_record); + new_buffer_size = + ((int) ((new_buffer_size / BUF_SIZE) + 1)) * BUF_SIZE; if (node_record_count == 0) - node_record_table_ptr = (struct node_record *) xmalloc (new_buffer_size); + node_record_table_ptr = + (struct node_record *) xmalloc (new_buffer_size); else if (old_buffer_size != new_buffer_size) xrealloc (node_record_table_ptr, new_buffer_size); @@ -269,15 +284,14 @@ create_node_record (struct config_record *config_point, char *node_name) /* - * delete_config_record - delete all configuration records - * output: returns 0 if no error, errno otherwise + * _delete_config_record - delete all configuration records + * RET 0 if no error, errno otherwise * global: config_list - list of all configuration records */ -int -delete_config_record () +static int _delete_config_record (void) { last_node_update = time (NULL); - (void) list_delete_all (config_list, &list_find_config, + (void) list_delete_all (config_list, &_list_find_config, "universal_key"); return SLURM_SUCCESS; } @@ -287,19 +301,19 @@ delete_config_record () * delete_node_record - delete the node record for a node with specified name * to avoid invalidating the bitmaps and hash table, we just clear the name * set its state to NODE_STATE_DOWN - * input: name - name of the desired node - * output: return SLURM_SUCCESS on success, errno otherwise + * IN name - name of the desired node + * RET 0 on success, errno otherwise * global: node_record_table_ptr - pointer to global node table */ -int -delete_node_record (char *name) +int delete_node_record (char *name) { struct node_record *node_record_point; /* pointer to node_record */ last_node_update = time (NULL); node_record_point = find_node_record (name); if (node_record_point == (struct node_record *) NULL) { - error ("delete_node_record: attempt to delete non-existent node %s", name); + error("delete_node_record: can't delete non-existent node %s", + name); return ENOENT; } @@ -315,36 +329,14 @@ delete_node_record (char *name) } -/* - * dump_hash - print the hash_table contents, used for debugging or analysis of hash technique - * global: node_record_table_ptr - pointer to global node table - * hash_table - table of hash indecies - */ -void -dump_hash () -{ - int i, inx; - - if (hash_table == NULL) - return; - for (i = 0; i < node_record_count; i++) { - inx = hash_table[i]; - if ((inx >= node_record_count) || - (strlen (node_record_table_ptr[inx].name) == 0)) - continue; - debug ("hash:%d:%s", i, node_record_table_ptr[inx].name); - } -} - - /* dump_all_node_state - save the state of all nodes to file */ -int -dump_all_node_state ( void ) +int dump_all_node_state ( void ) { int error_code = 0, inx, log_fd; char *old_file, *new_file, *reg_file; /* Locks: Read config and node */ - slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; + slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK, + NO_LOCK }; Buf buffer = init_buf(BUF_SIZE*16); /* write header: time */ @@ -354,10 +346,11 @@ dump_all_node_state ( void ) lock_slurmctld (node_read_lock); for (inx = 0; inx < node_record_count; inx++) { if ((node_record_table_ptr[inx].magic != NODE_MAGIC) || - (node_record_table_ptr[inx].config_ptr->magic != CONFIG_MAGIC)) + (node_record_table_ptr[inx].config_ptr->magic != + CONFIG_MAGIC)) fatal ("dump_all_node_state: data integrity is bad"); - dump_node_state (&node_record_table_ptr[inx], buffer); + _dump_node_state (&node_record_table_ptr[inx], buffer); } unlock_slurmctld (node_read_lock); @@ -371,13 +364,16 @@ dump_all_node_state ( void ) lock_state_files (); log_fd = creat (new_file, 0600); if (log_fd == 0) { - error ("Can't save state, error creating file %s %m", new_file); + error ("Can't save state, error creating file %s %m", + new_file); error_code = errno; } else { - if (write (log_fd, get_buf_data(buffer), get_buf_offset(buffer)) != + if (write (log_fd, get_buf_data(buffer), + get_buf_offset(buffer)) != get_buf_offset(buffer)) { - error ("Can't save state, error writing file %s %m", new_file); + error ("Can't save state, error writing file %s %m", + new_file); error_code = errno; } close (log_fd); @@ -401,12 +397,12 @@ dump_all_node_state ( void ) } /* - * dump_node_state - dump the state of a specific node to a buffer - * dump_node_ptr (I) - pointer to node for which information is requested - * buffer (I/O) - location to store data, pointers automatically advanced + * _dump_node_state - dump the state of a specific node to a buffer + * IN dump_node_ptr - pointer to node for which information is requested + * IN/OUT buffer - location to store data, pointers automatically advanced */ -void -dump_node_state (struct node_record *dump_node_ptr, Buf buffer) +static void +_dump_node_state (struct node_record *dump_node_ptr, Buf buffer) { packstr (dump_node_ptr->name, buffer); pack16 (dump_node_ptr->node_state, buffer); @@ -416,11 +412,11 @@ dump_node_state (struct node_record *dump_node_ptr, Buf buffer) } /* - * load_node_state - load the node state from file, recover from slurmctld restart. - * execute this after loading the configuration file data. + * load_node_state - load the node state from file, recover from slurmctld + * restart. execute this after loading the configuration file data. + * data goes into common storage */ -int -load_node_state ( void ) +int load_node_state ( void ) { char *node_name, *data = NULL, *state_file; int data_allocated, data_read = 0, error_code = 0; @@ -443,7 +439,9 @@ load_node_state ( void ) else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); - while ((data_read = read (state_fd, &data[data_size], BUF_SIZE)) == BUF_SIZE) { + while ((data_read = + read (state_fd, &data[data_size], BUF_SIZE)) == + BUF_SIZE) { data_size += data_read; data_allocated += BUF_SIZE; xrealloc(data, data_allocated); @@ -468,7 +466,8 @@ load_node_state ( void ) /* validity test as possible */ if ((cpus == 0) || - ((node_state & (~NODE_STATE_NO_RESPOND)) >= NODE_STATE_END)) { + ((node_state & (~NODE_STATE_NO_RESPOND)) >= + NODE_STATE_END)) { error ("Invalid data for node %s: cpus=%u, state=%u", node_name, cpus, node_state); error ("No more node data will be processed from the checkpoint file"); @@ -488,7 +487,8 @@ load_node_state ( void ) node_ptr->tmp_disk = tmp_disk; node_ptr->last_response = time (NULL); } else { - error ("Node %s has vanished from configuration", node_name); + error ("Node %s has vanished from configuration", + node_name); } if (node_name) xfree (node_name); @@ -498,7 +498,7 @@ load_node_state ( void ) return error_code; unpack_error: - error ("Incomplete node data checkpoint file. State not completely restored"); + error ("Incomplete node data checkpoint file. Incomplete restore."); free_buf (buffer); return EFAULT; } @@ -517,14 +517,16 @@ find_node_record (char *name) /* try to find in hash table first */ if (hash_table) { - i = hash_index (name); + i = _hash_index (name); if ( (i <= node_record_count) && ((inx = hash_table[i]) <= node_record_count ) && - (strncmp ((node_record_table_ptr[inx]).name, name, MAX_NAME_LEN) == 0) ) + (strncmp ((node_record_table_ptr[inx]).name, name, + MAX_NAME_LEN) == 0) ) return (node_record_table_ptr + inx); - debug ("find_node_record: hash table lookup failure for %s", name); + debug ("find_node_record: hash table lookup failure for %s", + name); #if DEBUG_SYSTEM - dump_hash (); + _dump_hash (); #endif } @@ -542,15 +544,13 @@ find_node_record (char *name) /* - * hash_index - return a hash table index for the given node name - * this code is optimized for names containing a base-ten suffix (e.g. "lx04") - * input: the node's name - * output: return code is the hash table index + * _hash_index - return a hash table index for the given node name + * IN name = the node's name + * RET the hash table index * global: hash_table - table of hash indexes * slurmctld_conf.hash_base - numbering base for sequence numbers */ -int -hash_index (char *name) +static int _hash_index (char *name) { int i, inx, tmp; @@ -564,7 +564,8 @@ hash_index (char *name) if (tmp == 0) break; /* end if string */ if ((tmp >= (int) '0') && (tmp <= (int) '9')) - inx = (inx * slurmctld_conf.hash_base) + (tmp - (int) '0'); + inx = (inx * slurmctld_conf.hash_base) + + (tmp - (int) '0'); } } @@ -574,7 +575,8 @@ hash_index (char *name) if (tmp == 0) break; /* end if string */ if ((tmp >= (int) '0') && (tmp <= (int) '7')) - inx = (inx * slurmctld_conf.hash_base) + (tmp - (int) '0'); + inx = (inx * slurmctld_conf.hash_base) + + (tmp - (int) '0'); } } @@ -583,14 +585,17 @@ hash_index (char *name) tmp = (int) name[i]; if (tmp == 0) break; /* end if string */ - if ((tmp >= (int) '0') && (tmp <= (int) '9')) { /* value 0-9 */ + if ((tmp >= (int) '0') && (tmp <= (int) '9')) { + /* value 0-9 */ tmp -= (int) '0'; } - else if ((tmp >= (int) 'a') && (tmp <= (int) 'z')) { /* value 10-35 */ + else if ((tmp >= (int) 'a') && (tmp <= (int) 'z')) { + /* value 10-35 */ tmp -= (int) 'a'; tmp += 10; } - else if ((tmp >= (int) 'a') && (tmp <= (int) 'z')) { /* value 10-35 */ + else if ((tmp >= (int) 'a') && (tmp <= (int) 'z')) { + /* value 10-35 */ tmp -= (int) 'a'; tmp += 10; } @@ -608,16 +613,16 @@ hash_index (char *name) /* * init_node_conf - initialize the node configuration tables and values. - * this should be called before creating any node or configuration entries. - * output: return value - 0 if no error, otherwise an error code + * this should be called before creating any node or configuration + * entries. + * RET 0 if no error, otherwise an error code * global: node_record_table_ptr - pointer to global node table * default_node_record - default values for node records * default_config_record - default values for configuration records * hash_table - table of hash indecies * last_node_update - time of last node table update */ -int -init_node_conf () +int init_node_conf (void) { last_node_update = time (NULL); @@ -654,9 +659,9 @@ init_node_conf () default_config_record.node_bitmap = (bitstr_t *) NULL; if (config_list) /* delete defunct configuration entries */ - (void) delete_config_record (); + (void) _delete_config_record (); else - config_list = list_create (&list_delete_config); + config_list = list_create (&_list_delete_config); if (config_list == NULL) fatal ("init_node_conf: list_create can not allocate memory"); @@ -664,10 +669,9 @@ init_node_conf () } -/* list_compare_config - compare two entry from the config list based upon weight, - * see list.h for documentation */ -int -list_compare_config (void *config_entry1, void *config_entry2) +/* list_compare_config - compare two entry from the config list based upon + * weight, see common/list.h for documentation */ +int list_compare_config (void *config_entry1, void *config_entry2) { int weight1, weight2; weight1 = ((struct config_record *) config_entry1)->weight; @@ -676,11 +680,12 @@ list_compare_config (void *config_entry1, void *config_entry2) } -/* list_delete_config - delete an entry from the config list, see list.h for documentation */ -void -list_delete_config (void *config_entry) +/* _list_delete_config - delete an entry from the config list, + * see list.h for documentation */ +static void _list_delete_config (void *config_entry) { - struct config_record *config_record_point; /* pointer to config_record */ + struct config_record *config_record_point; + config_record_point = (struct config_record *) config_entry; if (config_record_point->feature) xfree (config_record_point->feature); @@ -692,10 +697,13 @@ list_delete_config (void *config_entry) } -/* list_find_config - find an entry in the config list, see list.h for documentation - * key is partition name or "universal_key" for all config */ -int -list_find_config (void *config_entry, void *key) +/* + * _list_find_config - find an entry in the config list, see list.h for + * documentation + * IN key - is "universal_key" for all config + * RET 1 if key == "universal_key", 0 otherwise + */ +static int _list_find_config (void *config_entry, void *key) { if (strcmp (key, "universal_key") == 0) return 1; @@ -704,16 +712,15 @@ list_find_config (void *config_entry, void *key) /* - * node_name2bitmap - given a node name regular expression, build a bitmap representation - * input: node_names - list of nodes - * bitmap - place to put bitmap pointer - * output: bitmap - set to bitmap or NULL on error - * returns 0 if no error, otherwise EINVAL or enomem + * node_name2bitmap - given a node name regular expression, build a bitmap + * representation + * IN node_names - list of nodes + * OUT bitmap - set to bitmap or NULL on error + * RET 0 if no error, otherwise EINVAL or enomem * global: node_record_table_ptr - pointer to global node table * NOTE: the caller must xfree memory at bitmap when no longer required */ -int -node_name2bitmap (char *node_names, bitstr_t **bitmap) +int node_name2bitmap (char *node_names, bitstr_t **bitmap) { struct node_record *node_record_point; char *this_node_name; @@ -742,14 +749,16 @@ node_name2bitmap (char *node_names, bitstr_t **bitmap) while ( (this_node_name = hostlist_shift (host_list)) ) { node_record_point = find_node_record (this_node_name); if (node_record_point == NULL) { - error ("node_name2bitmap: invalid node specified %s",this_node_name); + error ("node_name2bitmap: invalid node specified %s", + this_node_name); hostlist_destroy (host_list); bit_free (my_bitmap); free (this_node_name); return EINVAL; } bit_set (my_bitmap, - (bitoff_t) (node_record_point - node_record_table_ptr)); + (bitoff_t) (node_record_point - + node_record_table_ptr)); free (this_node_name); } @@ -760,22 +769,18 @@ node_name2bitmap (char *node_names, bitstr_t **bitmap) /* - * pack_all_node - dump all configuration and node information for all nodes in - * machine independent form (for network transmission) - * input: buffer_ptr - location into which a pointer to the data is to be stored. - * the calling function must xfree the storage. - * buffer_size - location into which the size of the created buffer is in bytes - * update_time - dump new data only if partition records updated since time - * specified, otherwise return empty buffer - * output: buffer_ptr - the pointer is set to the allocated buffer. - * buffer_size - set to size of the buffer in bytes - * update_time - set to time partition records last updated + * pack_all_node - dump all configuration and node information for all nodes + * in machine independent form (for network transmission) + * OUT buffer_ptr - pointer to the stored data + * OUT buffer_size - set to size of the buffer in bytes + * IN/OUT update_time - dump new data only if partition records updated since + * time specified, otherwise return empty buffer, set to time partition + * records last updated * global: node_record_table_ptr - pointer to global node table - * NOTE: the caller must xfree the buffer at *buffer_ptr when no longer required - * NOTE: change slurm_load_node() in api/node_info.c whenever the data format changes + * NOTE: the caller must xfree the buffer at *buffer_ptr + * NOTE: change slurm_load_node() in api/node_info.c when data format changes */ -void -pack_all_node (char **buffer_ptr, int *buffer_size, time_t * update_time) +void pack_all_node (char **buffer_ptr, int *buffer_size, time_t * update_time) { int inx; uint32_t nodes_packed, tmp_offset; @@ -796,10 +801,11 @@ pack_all_node (char **buffer_ptr, int *buffer_size, time_t * update_time) /* write node records */ for (inx = 0; inx < node_record_count; inx++) { if ((node_record_table_ptr[inx].magic != NODE_MAGIC) || - (node_record_table_ptr[inx].config_ptr->magic != CONFIG_MAGIC)) + (node_record_table_ptr[inx].config_ptr->magic != + CONFIG_MAGIC)) fatal ("pack_all_node: data integrity is bad"); - pack_node(&node_record_table_ptr[inx], buffer); + _pack_node(&node_record_table_ptr[inx], buffer); nodes_packed ++ ; } @@ -815,24 +821,24 @@ pack_all_node (char **buffer_ptr, int *buffer_size, time_t * update_time) /* - * pack_node - dump all configuration information about a specific node in + * _pack_node - dump all configuration information about a specific node in * machine independent form (for network transmission) - * dump_node_ptr (I) - pointer to node for which information is requested - * buffer (I/O) - buffer in which data is place, pointers automatically updated - * NOTE: if you make any changes here be sure to make the corresponding changes to - * load_node_config in api/node_info.c + * IN dump_node_ptr - pointer to node for which information is requested + * IN/OUT buffer - buffer where data is placed, pointers automatically updated + * NOTE: if you make any changes here be sure to make the corresponding + * changes to load_node_config in api/node_info.c */ -void -pack_node (struct node_record *dump_node_ptr, Buf buffer) +static void _pack_node (struct node_record *dump_node_ptr, Buf buffer) { packstr (dump_node_ptr->name, buffer); pack16 (dump_node_ptr->node_state, buffer); - if (slurmctld_conf.fast_schedule) { /* Only data from config_record used for scheduling */ + if (slurmctld_conf.fast_schedule) { + /* Only data from config_record used for scheduling */ pack32 (dump_node_ptr->config_ptr->cpus, buffer); pack32 (dump_node_ptr->config_ptr->real_memory, buffer); pack32 (dump_node_ptr->config_ptr->tmp_disk, buffer); - } - else { /* Individual node data used for scheduling */ + } else { + /* Individual node data used for scheduling */ pack32 (dump_node_ptr->cpus, buffer); pack32 (dump_node_ptr->real_memory, buffer); pack32 (dump_node_ptr->tmp_disk, buffer); @@ -847,18 +853,15 @@ pack_node (struct node_record *dump_node_ptr, Buf buffer) /* - * rehash - build a hash table of the node_record entries. this is a large hash table - * to permit the immediate finding of a record based only upon its name without regards - * to the number. there should be no need for a search. the algorithm is optimized for - * node names with a base-ten sequence number suffix. if you have a large cluster and - * use a different naming convention, this function and/or the hash_index function - * should be re-written. + * rehash - build a hash table of the node_record entries. this is a large + * hash table to permit the immediate finding of a record based only + * upon its name without regards to their number. there should be no + * need for a search. * global: node_record_table_ptr - pointer to global node table * hash_table - table of hash indecies - * NOTE: allocates memory for hash_table + * NOTE: manages memory for hash_table */ -void -rehash () +void rehash (void) { int i, inx; @@ -868,7 +871,7 @@ rehash () for (i = 0; i < node_record_count; i++) { if (strlen (node_record_table_ptr[i].name) == 0) continue; - inx = hash_index (node_record_table_ptr[i].name); + inx = _hash_index (node_record_table_ptr[i].name); hash_table[inx] = i; } @@ -876,9 +879,9 @@ rehash () } -/* set_slurmd_addr - establish the slurm_addr for the slurmd on each node */ -void -set_slurmd_addr (void) +/* set_slurmd_addr - establish the slurm_addr for the slurmd on each node + * Uses common data structures. */ +void set_slurmd_addr (void) { int i; @@ -909,16 +912,15 @@ set_slurmd_addr (void) /* - * split_node_name - split a node name into prefix, suffix, index value, and digit count - * input: name - the node name to parse - * prefix, suffix, index, digits - location into which to store node name's constituents - * output: prefix, suffix, index - the node name's constituents - * index - index, defaults to NO_VAL - * digits - number of digits in the index, defaults to NO_VAL + * _split_node_name - split a node name into prefix, suffix, index value, + * and digit count + * IN name - the node name to parse + * OUT prefix, suffix, index - the node name's constituents + * OUT index - index, defaults to NO_VAL + * OUT digits - number of digits in the index, defaults to NO_VAL */ -void -split_node_name (char *name, char *prefix, char *suffix, int *index, - int *digits) +static void _split_node_name (char *name, char *prefix, char *suffix, + int *index, int *digits) { int i; char tmp[2]; @@ -952,10 +954,11 @@ split_node_name (char *name, char *prefix, char *suffix, int *index, /* * update_node - update the configuration data for one or more nodes + * IN update_node_msg - update node request + * RET 0 or error code * global: node_record_table_ptr - pointer to global node table */ -int -update_node ( update_node_msg_t * update_node_msg ) +int update_node ( update_node_msg_t * update_node_msg ) { int error_code = 0, state_val, node_inx; char *this_node_name ; @@ -963,15 +966,17 @@ update_node ( update_node_msg_t * update_node_msg ) hostlist_t host_list; if (update_node_msg -> node_names == NULL ) { - error ("update_node: invalid node name %s\n", + error ("update_node: invalid node name %s", update_node_msg -> node_names ); return ESLURM_INVALID_NODE_NAME; } state_val = update_node_msg -> node_state ; - if ( (host_list = hostlist_create (update_node_msg -> node_names)) == NULL) { - error ("hostlist_create error on %s: %m", update_node_msg -> node_names); + if ( (host_list = hostlist_create (update_node_msg -> node_names)) + == NULL) { + error ("hostlist_create error on %s: %m", + update_node_msg -> node_names); return ESLURM_INVALID_NODE_NAME; } @@ -980,7 +985,7 @@ update_node ( update_node_msg_t * update_node_msg ) node_record_point = find_node_record (this_node_name); node_inx = node_record_point - node_record_table_ptr; if (node_record_point == NULL) { - error ("update_node: node name %s does not exist, can not be updated", + error ("update_node: node %s does not exist, can't be updated", this_node_name); error_code = ESLURM_INVALID_NODE_NAME; free (this_node_name); @@ -1005,7 +1010,8 @@ update_node ( update_node_msg_t * update_node_msg ) bit_clear (idle_node_bitmap, node_inx); } else if (state_val == NODE_STATE_DRAINED) { - if (bit_test (idle_node_bitmap, node_inx) == false) + if (bit_test (idle_node_bitmap, node_inx) == + false) state_val = NODE_STATE_DRAINING; bit_clear (up_node_bitmap, node_inx); } @@ -1017,7 +1023,8 @@ update_node ( update_node_msg_t * update_node_msg ) bit_clear (up_node_bitmap, node_inx); } else { - error ("Invalid node state specified %d", state_val); + error ("Invalid node state specified %d", + state_val); } node_record_point->node_state = state_val; @@ -1035,16 +1042,17 @@ update_node ( update_node_msg_t * update_node_msg ) /* * validate_node_specs - validate the node's specifications as valid, * if not set state to down, in any case update last_response - * input: node_name - name of the node - * cpus - number of cpus measured - * real_memory - mega_bytes of real_memory measured - * tmp_disk - mega_bytes of tmp_disk measured - * output: returns 0 if no error, ENOENT if no such node, EINVAL if values too low + * IN node_name - name of the node + * IN cpus - number of cpus measured + * IN real_memory - mega_bytes of real_memory measured + * IN tmp_disk - mega_bytes of tmp_disk measured + * RET 0 if no error, ENOENT if no such node, EINVAL if values too low * global: node_record_table_ptr - pointer to global node table */ int validate_node_specs (char *node_name, uint32_t cpus, - uint32_t real_memory, uint32_t tmp_disk, uint32_t job_count) { + uint32_t real_memory, uint32_t tmp_disk, + uint32_t job_count) { int error_code; struct config_record *config_ptr; struct node_record *node_ptr; @@ -1059,21 +1067,25 @@ validate_node_specs (char *node_name, uint32_t cpus, error_code = 0; if (cpus < config_ptr->cpus) { - error ("validate_node_specs: node %s has low cpu count %u", node_name, cpus); + error ("validate_node_specs: node %s has low cpu count %u", + node_name, cpus); error_code = EINVAL; } node_ptr->cpus = cpus; if ((config_ptr->cpus != cpus) && (node_ptr->partition_ptr)) - node_ptr->partition_ptr->total_cpus += (cpus - config_ptr->cpus); + node_ptr->partition_ptr->total_cpus += + (cpus - config_ptr->cpus); if (real_memory < config_ptr->real_memory) { - error ("validate_node_specs: node %s has low real_memory size %u", node_name, real_memory); + error ("validate_node_specs: node %s has low real_memory size %u", + node_name, real_memory); error_code = EINVAL; } node_ptr->real_memory = real_memory; if (tmp_disk < config_ptr->tmp_disk) { - error ("validate_node_specs: node %s has low tmp_disk size %u", node_name, tmp_disk); + error ("validate_node_specs: node %s has low tmp_disk size %u", + node_name, tmp_disk); error_code = EINVAL; } node_ptr->tmp_disk = tmp_disk; @@ -1086,12 +1098,14 @@ validate_node_specs (char *node_name, uint32_t cpus, } else { - info ("validate_node_specs: node %s has registered", node_name); + info ("validate_node_specs: node %s has registered", + node_name); node_ptr->cpus = cpus; node_ptr->real_memory = real_memory; node_ptr->tmp_disk = tmp_disk; #ifdef HAVE_LIBELAN3 - /* Every node in a given partition must have the same processor count at present */ + /* Every node in a given partition must have the same + * processor count at present */ if ((slurmctld_conf.fast_schedule == 0) && (node_ptr->config_ptr->cpus != cpus)) { error ("Node %s has processor count inconsistent with rest of partition", @@ -1118,27 +1132,31 @@ validate_node_specs (char *node_name, uint32_t cpus, node_ptr->node_state = NODE_STATE_ALLOCATED; else node_ptr->node_state = NODE_STATE_IDLE; - info ("validate_node_specs: node %s returned to service", node_name); + info ("validate_node_specs: node %s returned to service", + node_name); resp_state = 1; /* just started responding */ } if (node_ptr->node_state == NODE_STATE_IDLE) { - bit_set (idle_node_bitmap, (node_ptr - node_record_table_ptr)); + bit_set (idle_node_bitmap, + (node_ptr - node_record_table_ptr)); if (resp_state) { - /* Node just started responding, do all pending RPCs now */ + /* Node just started responding, + * do all pending RPCs now */ retry_pending (node_name); } } if (node_ptr->node_state != NODE_STATE_DOWN) - bit_set (up_node_bitmap, (node_ptr - node_record_table_ptr)); + bit_set (up_node_bitmap, + (node_ptr - node_record_table_ptr)); } return error_code; } -/* node_did_resp - record that the specified node is responding */ -void -node_did_resp (char *name) +/* node_did_resp - record that the specified node is responding + * IN name - name of the node */ +void node_did_resp (char *name) { struct node_record *node_ptr; int node_inx; @@ -1160,7 +1178,8 @@ node_did_resp (char *name) if (node_ptr->node_state == NODE_STATE_IDLE) { bit_set (idle_node_bitmap, node_inx); if (resp_state) { - /* Node just started responding, do all its pending RPCs now */ + /* Node just started responding, + * do all its pending RPCs now */ retry_pending (name); } } @@ -1169,9 +1188,9 @@ node_did_resp (char *name) return; } -/* node_not_resp - record that the specified node is not responding */ -void -node_not_resp (char *name) +/* node_not_resp - record that the specified node is not responding + * IN name - name of the node */ +void node_not_resp (char *name) { struct node_record *node_ptr; int i; @@ -1196,8 +1215,7 @@ node_not_resp (char *name) /* ping_nodes - check that all nodes and daemons are alive, * get nodes in UNKNOWN state to register */ -void -ping_nodes (void) +void ping_nodes (void) { int i, pos, age; time_t now; @@ -1239,21 +1257,26 @@ ping_nodes (void) bit_clear (up_node_bitmap, i); bit_clear (idle_node_bitmap, i); node_record_table_ptr[i].node_state = NODE_STATE_DOWN; - kill_running_job_by_node_name (node_record_table_ptr[i].name); + kill_running_job_by_node_name ( + node_record_table_ptr[i].name); continue; } if (base_state == NODE_STATE_UNKNOWN) { - debug3 ("attempt to register %s now", node_record_table_ptr[i].name); - if ((reg_agent_args->node_count+1) > reg_buf_rec_size) { + debug3 ("attempt to register %s now", + node_record_table_ptr[i].name); + if ((reg_agent_args->node_count+1) > + reg_buf_rec_size) { reg_buf_rec_size += 32; xrealloc ((reg_agent_args->slurm_addr), - (sizeof (struct sockaddr_in) * reg_buf_rec_size)); + (sizeof (struct sockaddr_in) * + reg_buf_rec_size)); xrealloc ((reg_agent_args->node_names), (MAX_NAME_LEN * reg_buf_rec_size)); } - reg_agent_args->slurm_addr[reg_agent_args->node_count] = - node_record_table_ptr[i].slurm_addr; + reg_agent_args->slurm_addr[ + reg_agent_args->node_count] = + node_record_table_ptr[i].slurm_addr; pos = MAX_NAME_LEN * reg_agent_args->node_count; strncpy (®_agent_args->node_names[pos], node_record_table_ptr[i].name, MAX_NAME_LEN); @@ -1265,12 +1288,13 @@ ping_nodes (void) if ((ping_agent_args->node_count+1) > ping_buf_rec_size) { ping_buf_rec_size += 32; xrealloc ((ping_agent_args->slurm_addr), - (sizeof (struct sockaddr_in) * ping_buf_rec_size)); + (sizeof (struct sockaddr_in) * + ping_buf_rec_size)); xrealloc ((ping_agent_args->node_names), (MAX_NAME_LEN * ping_buf_rec_size)); } ping_agent_args->slurm_addr[ping_agent_args->node_count] = - node_record_table_ptr[i].slurm_addr; + node_record_table_ptr[i].slurm_addr; pos = MAX_NAME_LEN * ping_agent_args->node_count; strncpy (&ping_agent_args->node_names[pos], node_record_table_ptr[i].name, MAX_NAME_LEN); @@ -1284,18 +1308,21 @@ ping_nodes (void) debug ("Spawning ping agent"); if (pthread_attr_init (&ping_attr_agent)) fatal ("pthread_attr_init error %m"); - if (pthread_attr_setdetachstate (&ping_attr_agent, PTHREAD_CREATE_DETACHED)) + if (pthread_attr_setdetachstate (&ping_attr_agent, + PTHREAD_CREATE_DETACHED)) error ("pthread_attr_setdetachstate error %m"); #ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (&ping_attr_agent, PTHREAD_SCOPE_SYSTEM)) + if (pthread_attr_setscope (&ping_attr_agent, + PTHREAD_SCOPE_SYSTEM)) error ("pthread_attr_setscope error %m"); #endif if (pthread_create (&ping_thread_agent, &ping_attr_agent, agent, (void *)ping_agent_args)) { error ("pthread_create error %m"); sleep (1); /* sleep and try once more */ - if (pthread_create (&ping_thread_agent, &ping_attr_agent, - agent, (void *)ping_agent_args)) + if (pthread_create (&ping_thread_agent, + &ping_attr_agent, + agent, (void *)ping_agent_args)) fatal ("pthread_create error %m"); } } @@ -1306,25 +1333,31 @@ ping_nodes (void) debug ("Spawning node registration agent"); if (pthread_attr_init (®_attr_agent)) fatal ("pthread_attr_init error %m"); - if (pthread_attr_setdetachstate (®_attr_agent, PTHREAD_CREATE_DETACHED)) + if (pthread_attr_setdetachstate (®_attr_agent, + PTHREAD_CREATE_DETACHED)) error ("pthread_attr_setdetachstate error %m"); #ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (®_attr_agent, PTHREAD_SCOPE_SYSTEM)) + if (pthread_attr_setscope (®_attr_agent, + PTHREAD_SCOPE_SYSTEM)) error ("pthread_attr_setscope error %m"); #endif if (pthread_create (®_thread_agent, ®_attr_agent, agent, (void *)reg_agent_args)) { error ("pthread_create error %m"); sleep (1); /* sleep and try once more */ - if (pthread_create (®_thread_agent, ®_attr_agent, - agent, (void *)reg_agent_args)) + if (pthread_create (®_thread_agent, + ®_attr_agent, + agent, (void *)reg_agent_args)) fatal ("pthread_create error %m"); } } } -/* find_first_node_record - find a record for first node in the bitmap */ -extern struct node_record * +/* + * find_first_node_record - find a record for first node in the bitmap + * IN node_bitmap + */ +struct node_record * find_first_node_record (bitstr_t *node_bitmap) { int inx; @@ -1341,3 +1374,27 @@ find_first_node_record (bitstr_t *node_bitmap) return &node_record_table_ptr[inx]; } +#if DEBUG_SYSTEM +/* + * _dump_hash - print the hash_table contents, used for debugging or + * analysis of hash technique + * global: node_record_table_ptr - pointer to global node table + * hash_table - table of hash indecies + */ +static void _dump_hash (void) +{ + int i, inx; + + if (hash_table == NULL) + return; + for (i = 0; i < node_record_count; i++) { + inx = hash_table[i]; + if ((inx >= node_record_count) || + (strlen (node_record_table_ptr[inx].name) == 0)) + continue; + debug ("hash:%d:%s", i, node_record_table_ptr[inx].name); + } +} +#endif + + diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 1d817a3633b..dc705d5c392 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -26,7 +26,7 @@ \*****************************************************************************/ #ifdef HAVE_CONFIG_H -# include <config.h> +# include "config.h" #endif #include <errno.h> @@ -37,11 +37,11 @@ #include <syslog.h> #include <unistd.h> -#include <src/common/hostlist.h> -#include <src/common/slurm_errno.h> -#include <src/common/xmalloc.h> -#include <src/slurmctld/agent.h> -#include <src/slurmctld/slurmctld.h> +#include "src/common/hostlist.h" +#include "src/common/slurm_errno.h" +#include "src/common/xmalloc.h" +#include "src/slurmctld/agent.h" +#include "src/slurmctld/slurmctld.h" #define BUF_SIZE 1024 @@ -53,32 +53,35 @@ struct node_set { /* set of nodes with same configuration */ bitstr_t *my_bitmap; }; -int pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes, - int req_cpus, int consecutive); -int pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, - bitstr_t **req_bitmap, uint32_t req_cpus, uint32_t req_nodes, - int contiguous, int shared, uint32_t max_nodes); -void slurm_revoke_job_cred (struct node_record * node_ptr, revoke_credential_msg_t * revoke_job_cred_ptr); -int valid_features (char *requested, char *available); +static int _match_feature(char *seek, char *available); +static int _pick_best_quadrics(bitstr_t * bitmap, bitstr_t * req_bitmap, + int req_nodes, int req_cpus, + int consecutive); +static int _pick_best_nodes(struct node_set *node_set_ptr, + int node_set_size, bitstr_t ** req_bitmap, + uint32_t req_cpus, uint32_t req_nodes, + int contiguous, int shared, + uint32_t max_nodes); +static int _valid_features(char *requested, char *available); -/* allocate_nodes - for a given bitmap, change the state of specified nodes to NODE_STATE_ALLOCATED +/* allocate_nodes - change state of specified nodes to NODE_STATE_ALLOCATED + * IN bitmap - map of nodes to be allocated * globals: node_record_count - number of nodes in the system * node_record_table_ptr - pointer to global node table * last_node_update - last update time of node table */ -void -allocate_nodes (unsigned *bitmap) +void allocate_nodes(unsigned *bitmap) { int i; - last_node_update = time (NULL); + last_node_update = time(NULL); for (i = 0; i < node_record_count; i++) { - if (bit_test (bitmap, i) == 0) + if (bit_test(bitmap, i) == 0) continue; node_record_table_ptr[i].node_state = NODE_STATE_ALLOCATED; - bit_clear (idle_node_bitmap, i); + bit_clear(idle_node_bitmap, i); } return; } @@ -86,19 +89,18 @@ allocate_nodes (unsigned *bitmap) /* * count_cpus - report how many cpus are associated with the identified nodes - * input: bitmap - a node bitmap - * output: returns a cpu count + * IN bitmap - map of nodes to tally + * RET cpu count * globals: node_record_count - number of nodes configured * node_record_table_ptr - pointer to global node table */ -int -count_cpus (unsigned *bitmap) +int count_cpus(unsigned *bitmap) { int i, sum; sum = 0; - for (i = 0; i < node_record_count; i++) { - if (bit_test (bitmap, i) != 1) + for (i = 0; i < node_record_count; i++) { + if (bit_test(bitmap, i) != 1) continue; sum += node_record_table_ptr[i].cpus; } @@ -106,12 +108,14 @@ count_cpus (unsigned *bitmap) } -/* deallocate_nodes - for a given job, deallocate its nodes and make their state NODE_STATE_IDLE +/* + * deallocate_nodes - for a given job, deallocate its nodes and make + * their state NODE_STATE_IDLE + * IN job_ptr - pointer to terminating job * globals: node_record_count - number of nodes in the system * node_record_table_ptr - pointer to global node table */ -void -deallocate_nodes (struct job_record * job_ptr) +void deallocate_nodes(struct job_record *job_ptr) { int i; revoke_credential_msg_t *revoke_job_cred; @@ -121,149 +125,84 @@ deallocate_nodes (struct job_record * job_ptr) int buf_rec_size = 0; uint16_t no_resp_flag, base_state; - agent_args = xmalloc (sizeof (agent_arg_t)); + agent_args = xmalloc(sizeof(agent_arg_t)); agent_args->msg_type = REQUEST_REVOKE_JOB_CREDENTIAL; agent_args->retry = 1; - revoke_job_cred = xmalloc (sizeof (revoke_credential_msg_t)); - last_node_update = time (NULL); + revoke_job_cred = xmalloc(sizeof(revoke_credential_msg_t)); + last_node_update = time(NULL); revoke_job_cred->job_id = job_ptr->job_id; - revoke_job_cred->expiration_time = job_ptr->details->credential.expiration_time ; - memset ( (void *)revoke_job_cred->signature, 0, sizeof (revoke_job_cred->signature)); + revoke_job_cred->expiration_time = + job_ptr->details->credential.expiration_time; + memset((void *) revoke_job_cred->signature, 0, + sizeof(revoke_job_cred->signature)); for (i = 0; i < node_record_count; i++) { - if (bit_test (job_ptr->node_bitmap, i) == 0) + if (bit_test(job_ptr->node_bitmap, i) == 0) continue; - if ((agent_args->node_count+1) > buf_rec_size) { + if ((agent_args->node_count + 1) > buf_rec_size) { buf_rec_size += 32; - xrealloc ((agent_args->slurm_addr), - (sizeof (struct sockaddr_in) * buf_rec_size)); - xrealloc ((agent_args->node_names), - (MAX_NAME_LEN * buf_rec_size)); + xrealloc((agent_args->slurm_addr), + (sizeof(struct sockaddr_in) * + buf_rec_size)); + xrealloc((agent_args->node_names), + (MAX_NAME_LEN * buf_rec_size)); } - agent_args->slurm_addr[agent_args->node_count] = - node_record_table_ptr[i].slurm_addr; - strncpy (&agent_args->node_names[MAX_NAME_LEN*agent_args->node_count], - node_record_table_ptr[i].name, MAX_NAME_LEN); + agent_args->slurm_addr[agent_args->node_count] = + node_record_table_ptr[i].slurm_addr; + strncpy(&agent_args-> + node_names[MAX_NAME_LEN * agent_args->node_count], + node_record_table_ptr[i].name, MAX_NAME_LEN); agent_args->node_count++; - base_state = node_record_table_ptr[i].node_state & (~NODE_STATE_NO_RESPOND); - no_resp_flag = node_record_table_ptr[i].node_state & NODE_STATE_NO_RESPOND; + base_state = + node_record_table_ptr[i]. + node_state & (~NODE_STATE_NO_RESPOND); + no_resp_flag = + node_record_table_ptr[i]. + node_state & NODE_STATE_NO_RESPOND; if (base_state == NODE_STATE_DRAINING) { - node_record_table_ptr[i].node_state = NODE_STATE_DRAINED; - bit_clear (idle_node_bitmap, i); - bit_clear (up_node_bitmap, i); - } - else { - node_record_table_ptr[i].node_state = NODE_STATE_IDLE | no_resp_flag; + node_record_table_ptr[i].node_state = + NODE_STATE_DRAINED; + bit_clear(idle_node_bitmap, i); + bit_clear(up_node_bitmap, i); + } else { + node_record_table_ptr[i].node_state = + NODE_STATE_IDLE | no_resp_flag; if (no_resp_flag == 0) - bit_set (idle_node_bitmap, i); + bit_set(idle_node_bitmap, i); } } agent_args->msg_args = revoke_job_cred; - debug ("Spawning revoke credential agent"); - if (pthread_attr_init (&attr_agent)) - fatal ("pthread_attr_init error %m"); - if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) - error ("pthread_attr_setdetachstate error %m"); + debug("Spawning revoke credential agent"); + if (pthread_attr_init(&attr_agent)) + fatal("pthread_attr_init error %m"); + if (pthread_attr_setdetachstate + (&attr_agent, PTHREAD_CREATE_DETACHED)) + error("pthread_attr_setdetachstate error %m"); #ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (&attr_agent, PTHREAD_SCOPE_SYSTEM)) - error ("pthread_attr_setscope error %m"); + if (pthread_attr_setscope(&attr_agent, PTHREAD_SCOPE_SYSTEM)) + error("pthread_attr_setscope error %m"); #endif - if (pthread_create (&thread_agent, &attr_agent, agent, (void *)agent_args)) { - error ("pthread_create error %m"); - sleep (1); /* sleep and try once more */ - if (pthread_create (&thread_agent, &attr_agent, agent, (void *)agent_args)) - fatal ("pthread_create error %m"); - } - return; -} - -/* slurm_revoke_job_cred - send RPC for slurmd to revoke a credential */ -void -slurm_revoke_job_cred(struct node_record *node_ptr, - revoke_credential_msg_t *revoke_job_cred_ptr) -{ - int msg_size; - int rc; - slurm_fd sockfd; - slurm_msg_t request_msg; - slurm_msg_t response_msg; - return_code_msg_t * slurm_rc_msg; - - /* init message connection for message communication with slurmd */ - if ((sockfd = slurm_open_msg_conn(&node_ptr->slurm_addr)) < 0) { - error("revoke_job_cred: unable to connect to %s: %m", - node_ptr->name); - return; - } - - /* send request message */ - request_msg.msg_type = REQUEST_REVOKE_JOB_CREDENTIAL; - request_msg.data = revoke_job_cred_ptr; - if ((rc = slurm_send_node_msg(sockfd, &request_msg)) < 0) { - error ("revoke_job_cred: unable to send revoke msg to %s: %m", - node_ptr->name); - return; - } - - /* receive message */ - if ((msg_size = slurm_receive_msg(sockfd, &response_msg)) < 0) { - error ("revoke_job_cred: error in recv from %s: %m", - node_ptr->name); - return; - } - - /* shutdown message connection */ - if ((rc = slurm_shutdown_msg_conn(sockfd)) < 0) - error ("revoke_job_cred/shutdown_msg_conn error for %s", - node_ptr->name); - if (msg_size) - error ("revoke_job_cred/msg_size error %d for %s", - msg_size, node_ptr->name); - /* XXX: why was this here??? */ - /* return; */ - - switch ( response_msg . msg_type ) - { - case RESPONSE_SLURM_RC: - slurm_rc_msg = ( return_code_msg_t * ) response_msg . data ; - rc = slurm_rc_msg->return_code; - slurm_free_return_code_msg ( slurm_rc_msg ); - if (rc) - error ("slurm_revoke_job_cred/rc error %d for %s", rc, node_ptr->name); - break ; - default: - error ("slurm_revoke_job_cred/msg_type error %d for %s", - response_msg.msg_type, node_ptr->name); - break ; + if (pthread_create + (&thread_agent, &attr_agent, agent, (void *) agent_args)) { + error("pthread_create error %m"); + sleep(1); /* sleep and try once more */ + if (pthread_create + (&thread_agent, &attr_agent, agent, + (void *) agent_args)) + fatal("pthread_create error %m"); } return; } -/* - * is_key_valid - determine if supplied partition key is valid - * input: key - a slurm key acquired by user root - * output: returns 1 if key is valid, 0 otherwise - * NOTE: this is only a placeholder for a future function - * the format of the key is TBD - */ -int -is_key_valid (void * key) -{ - if (key) - return 1; - return 0; -} - /* - * match_feature - determine if the desired feature is one of those available - * input: seek - desired feature - * available - comma separated list of features - * output: returns 1 if found, 0 otherwise + * _match_feature - determine if the desired feature is one of those available + * IN seek - desired feature + * IN available - comma separated list of availablefeatures + * RET 1 if found, 0 otherwise */ -int -match_feature (char *seek, char *available) +static int _match_feature(char *seek, char *available) { char *tmp_available, *str_ptr3, *str_ptr4; int found; @@ -273,64 +212,72 @@ match_feature (char *seek, char *available) if (available == NULL) return SLURM_SUCCESS; /* nothing to find */ - tmp_available = xmalloc (strlen (available) + 1); - strcpy (tmp_available, available); + tmp_available = xmalloc(strlen(available) + 1); + strcpy(tmp_available, available); found = 0; - str_ptr3 = (char *) strtok_r (tmp_available, ",", &str_ptr4); + str_ptr3 = (char *) strtok_r(tmp_available, ",", &str_ptr4); while (str_ptr3) { - if (strcmp (seek, str_ptr3) == 0) { /* we have a match */ + if (strcmp(seek, str_ptr3) == 0) { /* we have a match */ found = 1; break; - } - str_ptr3 = (char *) strtok_r (NULL, ",", &str_ptr4); + } + str_ptr3 = (char *) strtok_r(NULL, ",", &str_ptr4); } - xfree (tmp_available); + xfree(tmp_available); return found; } /* - * pick_best_quadrics - Given a bitmap of nodes to select from (bitmap), a bitmap of - * nodes required by the job (req_bitmap), a count of required node (req_nodes), - * a count of required processors (req_cpus) and a flag indicating if consecutive nodes - * are required (0|1, consecutive), identify the nodes which "best" satify the request. + * _pick_best_quadrics - Given a specification of scheduling requirements, + * identify the nodes which "best" satify the request. * "best" is defined as either single set of consecutive nodes satisfying * the request and leaving the minimum number of unused nodes OR * the fewest number of consecutive node sets - * output: bitmap - nodes not required to satisfy the request are cleared, other left set - * returns zero on success, EINVAL otherwise + * IN/OUT bitmap - usable nodes are set on input, nodes not required to + * satisfy the request are cleared, other left set + * IN req_bitmap - map of required nodes + * IN req_nodes - count of required nodes + * IN req_cpus - count of required processors + * IN consecutive - allocated nodes must be consecutive if set + * RET zero on success, EINVAL otherwise * globals: node_record_count - count of nodes configured * node_record_table_ptr - pointer to global node table - * NOTE: bitmap must be a superset of req_nodes at the time that pick_best_quadrics is called + * NOTE: bitmap must be a superset of req_nodes at the time that + * _pick_best_quadrics is called */ -int -pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes, - int req_cpus, int consecutive) +static int +_pick_best_quadrics(bitstr_t * bitmap, bitstr_t * req_bitmap, + int req_nodes, int req_cpus, int consecutive) { int i, index, error_code, sufficient; - int *consec_nodes; /* how many nodes we can add from this consecutive set of nodes */ - int *consec_cpus; /* how many nodes we can add from this consecutive set of nodes */ + int *consec_nodes; /* how many nodes we can add from this + * consecutive set of nodes */ + int *consec_cpus; /* how many nodes we can add from this + * consecutive set of nodes */ int *consec_start; /* where this consecutive set starts (index) */ int *consec_end; /* where this consecutive set ends (index) */ - int *consec_req; /* are nodes from this set required (in req_bitmap) */ + int *consec_req; /* are nodes from this set required + * (in req_bitmap) */ int consec_index, consec_size; int rem_cpus, rem_nodes; /* remaining resources required */ int best_fit_nodes, best_fit_cpus, best_fit_req; int best_fit_location = 0, best_fit_sufficient; if (bitmap == NULL) - fatal ("pick_best_quadrics: bitmap pointer is NULL\n"); + fatal("_pick_best_quadrics: bitmap pointer is NULL"); - error_code = EINVAL; /* default is no fit */ + error_code = EINVAL; /* default is no fit */ consec_index = 0; - consec_size = 50; /* start allocation for 50 sets of consecutive nodes */ - consec_cpus = xmalloc (sizeof (int) * consec_size); - consec_nodes = xmalloc (sizeof (int) * consec_size); - consec_start = xmalloc (sizeof (int) * consec_size); - consec_end = xmalloc (sizeof (int) * consec_size); - consec_req = xmalloc (sizeof (int) * consec_size); + consec_size = 50; /* start allocation for 50 sets of + * consecutive nodes */ + consec_cpus = xmalloc(sizeof(int) * consec_size); + consec_nodes = xmalloc(sizeof(int) * consec_size); + consec_start = xmalloc(sizeof(int) * consec_size); + consec_end = xmalloc(sizeof(int) * consec_size); + consec_req = xmalloc(sizeof(int) * consec_size); /* Build table with information about sets of consecutive nodes */ consec_cpus[consec_index] = consec_nodes[consec_index] = 0; @@ -338,65 +285,70 @@ pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes, rem_cpus = req_cpus; rem_nodes = req_nodes; for (index = 0; index < node_record_count; index++) { - if (bit_test (bitmap, index)) { + if (bit_test(bitmap, index)) { if (consec_nodes[consec_index] == 0) consec_start[consec_index] = index; - if (slurmctld_conf.fast_schedule) /* don't bother checking each node */ - i = node_record_table_ptr[index].config_ptr->cpus; + if (slurmctld_conf.fast_schedule) + /* don't bother checking each node */ + i = node_record_table_ptr[index]. + config_ptr->cpus; else i = node_record_table_ptr[index].cpus; - if (req_bitmap && bit_test (req_bitmap, index)) { - if (consec_req[consec_index] == -1) + if (req_bitmap && bit_test(req_bitmap, index)) { + if (consec_req[consec_index] == -1) /* first required node in set */ - consec_req[consec_index] = index; + consec_req[consec_index] = index; rem_cpus -= i; rem_nodes--; - } - else { - bit_clear (bitmap, index); + } else { /* node not required (yet) */ + bit_clear(bitmap, index); consec_cpus[consec_index] += i; consec_nodes[consec_index]++; - } - } - else if (consec_nodes[consec_index] == 0) { - consec_req[consec_index] = -1; + } + } else if (consec_nodes[consec_index] == 0) { + consec_req[consec_index] = -1; /* already picked up any required nodes */ /* re-use this record */ - } - else { + } else { consec_end[consec_index] = index - 1; if (++consec_index >= consec_size) { consec_size *= 2; - xrealloc (consec_cpus, sizeof (int) * consec_size); - xrealloc (consec_nodes, sizeof (int) * consec_size); - xrealloc (consec_start, sizeof (int) * consec_size); - xrealloc (consec_end, sizeof (int) * consec_size); - xrealloc (consec_req, sizeof (int) * consec_size); - } + xrealloc(consec_cpus, + sizeof(int) * consec_size); + xrealloc(consec_nodes, + sizeof(int) * consec_size); + xrealloc(consec_start, + sizeof(int) * consec_size); + xrealloc(consec_end, + sizeof(int) * consec_size); + xrealloc(consec_req, + sizeof(int) * consec_size); + } consec_cpus[consec_index] = 0; consec_nodes[consec_index] = 0; consec_req[consec_index] = -1; - } + } } if (consec_nodes[consec_index] != 0) consec_end[consec_index++] = index - 1; #ifdef EXTREME_DEBUG /* don't compile this, slows things down too much */ - debug3 ("rem_cpus=%d, rem_nodes=%d", rem_cpus, rem_nodes); + debug3("rem_cpus=%d, rem_nodes=%d", rem_cpus, rem_nodes); for (i = 0; i < consec_index; i++) { if (consec_req[i] != -1) - debug3 ("start=%s, end=%s, nodes=%d, cpus=%d, req=%s", - node_record_table_ptr[consec_start[i]].name, - node_record_table_ptr[consec_end[i]].name, - consec_nodes[i], consec_cpus[i], - node_record_table_ptr[consec_req[i]].name); + debug3 + ("start=%s, end=%s, nodes=%d, cpus=%d, req=%s", + node_record_table_ptr[consec_start[i]].name, + node_record_table_ptr[consec_end[i]].name, + consec_nodes[i], consec_cpus[i], + node_record_table_ptr[consec_req[i]].name); else - debug3 ("start=%s, end=%s, nodes=%d, cpus=%d", - node_record_table_ptr[consec_start[i]].name, - node_record_table_ptr[consec_end[i]].name, - consec_nodes[i], consec_cpus[i]); - } + debug3("start=%s, end=%s, nodes=%d, cpus=%d", + node_record_table_ptr[consec_start[i]].name, + node_record_table_ptr[consec_end[i]].name, + consec_nodes[i], consec_cpus[i]); + } #endif /* accumulate nodes from these sets of consecutive nodes until */ @@ -409,30 +361,41 @@ pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes, continue; sufficient = ((consec_nodes[i] >= rem_nodes) && (consec_cpus[i] >= rem_cpus)); - if ((best_fit_nodes == 0) || /* first possibility */ - ((best_fit_req == -1) && (consec_req[i] != -1)) || /* required nodes */ - (sufficient && (best_fit_sufficient == 0)) || /* first large enough */ - (sufficient && (consec_cpus[i] < best_fit_cpus)) || /* less waste option */ - ((sufficient == 0) && (consec_cpus[i] > best_fit_cpus))) { /* larger option */ + + /* if first possibility OR */ + /* contains required nodes OR */ + /* first set large enough for request OR */ + /* tightest fit (less resource waste) OR */ + /* nothing yet large enough, but this is biggest */ + if ((best_fit_nodes == 0) || + ((best_fit_req == -1) && (consec_req[i] != -1)) || + (sufficient && (best_fit_sufficient == 0)) || + (sufficient && (consec_cpus[i] < best_fit_cpus)) || + ((sufficient == 0) && + (consec_cpus[i] > best_fit_cpus))) { best_fit_cpus = consec_cpus[i]; best_fit_nodes = consec_nodes[i]; best_fit_location = i; best_fit_req = consec_req[i]; best_fit_sufficient = sufficient; - } + } } if (best_fit_nodes == 0) break; - if (consecutive && ((best_fit_nodes < rem_nodes) || (best_fit_cpus < rem_cpus))) + if (consecutive && ((best_fit_nodes < rem_nodes) + || (best_fit_cpus < rem_cpus))) break; /* no hole large enough */ - if (best_fit_req != -1) { /* work out from required nodes */ + if (best_fit_req != -1) { + /* This collection of nodes includes required ones + * select nodes from this set, first working up + * then down from the required nodes */ for (i = best_fit_req; i <= consec_end[best_fit_location]; i++) { if ((rem_nodes <= 0) && (rem_cpus <= 0)) break; - if (bit_test (bitmap, i)) + if (bit_test(bitmap, i)) continue; - bit_set (bitmap, i); + bit_set(bitmap, i); rem_nodes--; rem_cpus -= node_record_table_ptr[i].cpus; } @@ -440,90 +403,95 @@ pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes, i >= consec_start[best_fit_location]; i--) { if ((rem_nodes <= 0) && (rem_cpus <= 0)) break; - /* if (bit_test(bitmap, i)) continue; nothing set earlier */ - bit_set (bitmap, i); + /* if (bit_test(bitmap, i)) + continue; cleared above earlier */ + bit_set(bitmap, i); rem_nodes--; rem_cpus -= node_record_table_ptr[i].cpus; } - } - else { + } else { for (i = consec_start[best_fit_location]; i <= consec_end[best_fit_location]; i++) { if ((rem_nodes <= 0) && (rem_cpus <= 0)) break; - if (bit_test (bitmap, i)) + if (bit_test(bitmap, i)) continue; - bit_set (bitmap, i); + bit_set(bitmap, i); rem_nodes--; rem_cpus -= node_record_table_ptr[i].cpus; } - } + } if ((rem_nodes <= 0) && (rem_cpus <= 0)) { error_code = 0; break; - } + } consec_cpus[best_fit_location] = 0; consec_nodes[best_fit_location] = 0; - } + } if (consec_cpus) - xfree (consec_cpus); + xfree(consec_cpus); if (consec_nodes) - xfree (consec_nodes); + xfree(consec_nodes); if (consec_start) - xfree (consec_start); + xfree(consec_start); if (consec_end) - xfree (consec_end); + xfree(consec_end); if (consec_req) - xfree (consec_req); + xfree(consec_req); return error_code; } /* - * pick_best_nodes - from a weigh order table of all nodes satisfying a job's specifications, - * select the "best" for use - * input: node_set_ptr - pointer to node specification information - * node_set_size - number of entries in records pointed to by node_set_ptr - * req_bitmap - pointer to bitmap of specific nodes required by the job, could be NULL - * req_cpus - count of cpus required by the job - * req_nodes - count of nodes required by the job - * contiguous - set to 1 if allocated nodes must be contiguous, 0 otherwise - * shared - set to 1 if nodes may be shared, 0 otherwise - * max_nodes - maximum number of nodes permitted for job, - * INFIITE for no limit (partition limit) - * output: req_bitmap - pointer to bitmap of selected nodes - * returns 0 on success, EAGAIN if request can not be satisfied now, - * EINVAL if request can never be satisfied (insufficient contiguous nodes) + * _pick_best_nodes - from a weigh order table of all nodes satisfying a + * job's specifications, select the "best" for use + * IN node_set_ptr - pointer to node specification information + * IN node_set_size - number of entries in records pointed to by node_set_ptr + * IN/OUT req_bitmap - pointer to bitmap of specific nodes required by the + * job, could be NULL, returns bitmap of selected nodes, must xfree + * IN req_cpus - count of cpus required by the job + * IN req_nodes - count of nodes required by the job + * IN contiguous - 1 if allocated nodes must be contiguous, 0 otherwise + * IN shared - set to 1 if nodes may be shared, 0 otherwise + * IN max_nodes - maximum number of nodes permitted for job, + * INFIITE for no limit (partition limit) + * RET 0 on success, EAGAIN if request can not be satisfied now, EINVAL if + * request can never be satisfied (insufficient contiguous nodes) * NOTE: the caller must xfree memory pointed to by req_bitmap * Notes: The algorithm is - * 1) If required node list is specified, determine implicitly required processor and node count - * 2) Determine how many disjoint required "features" are represented (e.g. "FS1|FS2") - * 3) For each feature: find matching node table entries, identify nodes that are up and - * available (idle or shared) and add them to a bit map, call pick_best_quadrics() to - * select the "best" of those based upon topology - * 4) If request can't be satified now, execute pick_best_quadrics() against the list - * of nodes that exist in any state (perhaps down or busy) to determine if the - * request can every be satified. + * 1) If required node list is specified, determine implicitly required + * processor and node count + * 2) Determine how many disjoint required "features" are represented + * (e.g. "FS1|FS2") + * 3) For each feature: find matching node table entries, identify nodes + * that are up and available (idle or shared) and add them to a bit + * map, call _pick_best_quadrics() to select the "best" of those + * based upon topology + * 4) If request can't be satified now, execute _pick_best_quadrics() + * against the list of nodes that exist in any state (perhaps down + * or busy) to determine if the request can ever be satified. */ -int -pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, - bitstr_t **req_bitmap, uint32_t req_cpus, uint32_t req_nodes, - int contiguous, int shared, uint32_t max_nodes) +static int +_pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, + bitstr_t ** req_bitmap, uint32_t req_cpus, + uint32_t req_nodes, int contiguous, int shared, + uint32_t max_nodes) { int error_code, i, j, pick_code; - int total_nodes, total_cpus; /* total resources configured in partition */ + int total_nodes, total_cpus; /* total resources configured in + partition */ int avail_nodes, avail_cpus; /* resources available for use now */ bitstr_t *avail_bitmap, *total_bitmap; int max_feature, min_feature; int avail_set, total_set, runable; if (node_set_size == 0) { - info ("pick_best_nodes: empty node set for selection"); + info("_pick_best_nodes: empty node set for selection"); return EINVAL; } if ((max_nodes != INFINITE) && (req_nodes > max_nodes)) { - info ("pick_best_nodes: more nodes required than possible in partition"); + info("_pick_best_nodes: more nodes required than partition limit"); return EINVAL; } error_code = 0; @@ -531,26 +499,29 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, avail_nodes = avail_cpus = 0; total_nodes = total_cpus = 0; if (req_bitmap[0]) { /* specific nodes required */ - /* NOTE: we have already confirmed that all of these nodes have a usable */ - /* configuration and are in the proper partition */ + /* we have already confirmed that all of these nodes have a + * usable configuration and are in the proper partition */ if (req_nodes != 0) - total_nodes = bit_set_count (req_bitmap[0]); + total_nodes = bit_set_count(req_bitmap[0]); if (req_cpus != 0) - total_cpus = count_cpus (req_bitmap[0]); + total_cpus = count_cpus(req_bitmap[0]); if (total_nodes > max_nodes) { - info ("pick_best_nodes: more nodes required than possible in partition"); + info("_pick_best_nodes: more nodes required than partition limit"); return EINVAL; } if ((req_nodes <= total_nodes) && (req_cpus <= total_cpus)) { - if (bit_super_set (req_bitmap[0], up_node_bitmap) != 1) + if (bit_super_set(req_bitmap[0], up_node_bitmap) != + 1) return EAGAIN; if ((shared != 1) && - (bit_super_set (req_bitmap[0], idle_node_bitmap) != 1)) + (bit_super_set(req_bitmap[0], idle_node_bitmap) + != 1)) return EAGAIN; - return SLURM_SUCCESS; /* user can have selected nodes, we're done! */ - } + return SLURM_SUCCESS; /* user can have selected + * nodes, we're done! */ + } total_nodes = total_cpus = 0; /* reinitialize */ - } + } /* identify how many feature sets we have (e.g. "[fs1|fs2|fs3|fs4]" */ max_feature = min_feature = node_set_ptr[0].feature; @@ -561,7 +532,7 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, min_feature = node_set_ptr[i].feature; } - runable = 0; /* assume not runable until otherwise demonstrated */ + runable = 0; /* assume not runable until proven otherwise */ for (j = min_feature; j <= max_feature; j++) { avail_set = total_set = 0; for (i = 0; i < node_set_size; i++) { @@ -569,74 +540,97 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, continue; if (runable == 0) { if (total_set) - bit_or (total_bitmap, node_set_ptr[i].my_bitmap); + bit_or(total_bitmap, + node_set_ptr[i].my_bitmap); else { - total_bitmap = bit_copy (node_set_ptr[i].my_bitmap); - if (total_bitmap == NULL) - fatal ("bit_copy failed to allocate memory"); + total_bitmap = + bit_copy(node_set_ptr[i]. + my_bitmap); + if (total_bitmap == NULL) + fatal + ("bit_copy failed to allocate memory"); total_set = 1; - } + } total_nodes += node_set_ptr[i].nodes; - total_cpus += (node_set_ptr[i].nodes * node_set_ptr[i].cpus_per_node); - } - bit_and (node_set_ptr[i].my_bitmap, up_node_bitmap); + total_cpus += + (node_set_ptr[i].nodes * + node_set_ptr[i].cpus_per_node); + } + bit_and(node_set_ptr[i].my_bitmap, up_node_bitmap); if (shared != 1) - bit_and (node_set_ptr[i].my_bitmap, idle_node_bitmap); - node_set_ptr[i].nodes = bit_set_count (node_set_ptr[i].my_bitmap); + bit_and(node_set_ptr[i].my_bitmap, + idle_node_bitmap); + node_set_ptr[i].nodes = + bit_set_count(node_set_ptr[i].my_bitmap); if (avail_set) - bit_or (avail_bitmap, node_set_ptr[i].my_bitmap); + bit_or(avail_bitmap, + node_set_ptr[i].my_bitmap); else { - avail_bitmap = bit_copy (node_set_ptr[i].my_bitmap); - if (avail_bitmap == NULL) - fatal ("bit_copy memory allocation failure"); + avail_bitmap = + bit_copy(node_set_ptr[i].my_bitmap); + if (avail_bitmap == NULL) + fatal + ("bit_copy memory allocation failure"); avail_set = 1; - } + } avail_nodes += node_set_ptr[i].nodes; - avail_cpus += (node_set_ptr[i].nodes * node_set_ptr[i].cpus_per_node); - if ((req_bitmap[0]) && - (bit_super_set (req_bitmap[0], avail_bitmap) == 0)) + avail_cpus += + (node_set_ptr[i].nodes * + node_set_ptr[i].cpus_per_node); + if ((req_bitmap[0]) + && (bit_super_set(req_bitmap[0], avail_bitmap) + == 0)) continue; if (avail_nodes < req_nodes) continue; if (avail_cpus < req_cpus) continue; - pick_code = pick_best_quadrics (avail_bitmap, req_bitmap[0], req_nodes, req_cpus, contiguous); + pick_code = + _pick_best_quadrics(avail_bitmap, + req_bitmap[0], req_nodes, + req_cpus, contiguous); if ((pick_code == 0) && (max_nodes != INFINITE) - && (bit_set_count (avail_bitmap) > max_nodes)) { - info ("pick_best_nodes: too many nodes selected %u partition maximum is %u", - bit_set_count (avail_bitmap), max_nodes); + && (bit_set_count(avail_bitmap) > max_nodes)) { + info("_pick_best_nodes: too many nodes selected %u partition maximum is %u", + bit_set_count(avail_bitmap), max_nodes); error_code = EINVAL; break; - } + } if (pick_code == 0) { if (total_bitmap) - bit_free (total_bitmap); + bit_free(total_bitmap); if (req_bitmap[0]) - bit_free (req_bitmap[0]); + bit_free(req_bitmap[0]); req_bitmap[0] = avail_bitmap; return SLURM_SUCCESS; } } - /* determine if job could possibly run (if configured nodes all available) */ + /* determine if job could possibly run (if configured + * nodes all available) */ if ((error_code == 0) && (runable == 0) && - (total_nodes >= req_nodes) && (total_cpus >= req_cpus) && - ((req_bitmap[0] == NULL) || (bit_super_set (req_bitmap[0], total_bitmap) == 1)) && - ((max_nodes == INFINITE) || (req_nodes <= max_nodes))) { - pick_code = pick_best_quadrics (total_bitmap, req_bitmap[0], req_nodes, req_cpus, contiguous); + (total_nodes >= req_nodes) && (total_cpus >= req_cpus) + && ((req_bitmap[0] == NULL) + || (bit_super_set(req_bitmap[0], total_bitmap) == + 1)) && ((max_nodes == INFINITE) + || (req_nodes <= max_nodes))) { + pick_code = + _pick_best_quadrics(total_bitmap, + req_bitmap[0], req_nodes, + req_cpus, contiguous); if ((pick_code == 0) && (max_nodes != INFINITE) - && (bit_set_count (total_bitmap) > max_nodes)) { + && (bit_set_count(total_bitmap) > max_nodes)) { error_code = EINVAL; - info ("pick_best_nodes: %u nodes selected, max is %u", - bit_set_count (avail_bitmap), max_nodes); + info("_pick_best_nodes: %u nodes selected, max is %u", + bit_set_count(avail_bitmap), max_nodes); } if (pick_code == 0) runable = 1; - } + } if (avail_bitmap) - bit_free (avail_bitmap); + bit_free(avail_bitmap); if (total_bitmap) - bit_free (total_bitmap); + bit_free(total_bitmap); avail_bitmap = total_bitmap = NULL; if (error_code != 0) break; @@ -644,7 +638,7 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, if (runable == 0) { error_code = EINVAL; - info ("pick_best_nodes: job never runnable"); + info("_pick_best_nodes: job never runnable"); } if (error_code == 0) error_code = EAGAIN; @@ -654,21 +648,22 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size, /* * select_nodes - select and allocate nodes to a specific job - * input: job_ptr - pointer to the job record - * test_only - do not allocate nodes, just confirm they could be allocated now - * output: returns 0 on success, ESLURM code from slurm_errno.h otherwise) + * IN job_ptr - pointer to the job record + * IN test_only - do not allocate nodes, just confirm they could be + * allocated now + * RET 0 on success, ESLURM code from slurm_errno.h otherwise * globals: list_part - global list of partition info * default_part_loc - pointer to default partition * config_list - global list of node configuration info * Notes: The algorithm is - * 1) Build a table (node_set_ptr) of nodes with the requisite configuration - * Each table entry includes their weight, node_list, features, etc. - * 2) Call pick_best_nodes() to select those nodes best satisfying the request, - * (e.g. best-fit or other criterion) + * 1) Build a table (node_set_ptr) of nodes with the requisite + * configuration. Each table entry includes their weight, + * node_list, features, etc. + * 2) Call _pick_best_nodes() to select those nodes best satisfying + * the request, (e.g. best-fit or other criterion) * 3) Call allocate_nodes() to perform the actual allocation */ -int -select_nodes (struct job_record *job_ptr, int test_only) +int select_nodes(struct job_record *job_ptr, int test_only) { int error_code, i, node_set_index, node_set_size = 0; bitstr_t *req_bitmap, *scratch_bitmap; @@ -692,130 +687,154 @@ select_nodes (struct job_record *job_ptr, int test_only) /* insure that partition exists and is up */ part_ptr = find_part_record(job_ptr->partition); if (part_ptr == NULL) - fatal("select_nodes: invalid partition name %s for job %u", - job_ptr->partition, job_ptr->job_id); + fatal("select_nodes: invalid partition name %s for job %u", + job_ptr->partition, job_ptr->job_id); if (part_ptr->state_up == 0) return ESLURM_NODES_BUSY; /* pick up nodes from the weight ordered configuration list */ node_set_index = 0; node_set_size = 0; - node_set_ptr = (struct node_set *) xmalloc (sizeof (struct node_set)); + node_set_ptr = + (struct node_set *) xmalloc(sizeof(struct node_set)); node_set_ptr[node_set_size++].my_bitmap = NULL; - if (job_ptr->details->req_node_bitmap) /* insure selected nodes in partition */ - req_bitmap = bit_copy (job_ptr->details->req_node_bitmap); + if (job_ptr->details->req_node_bitmap) /* insure selected nodes in + this partition */ + req_bitmap = bit_copy(job_ptr->details->req_node_bitmap); - config_record_iterator = list_iterator_create (config_list); + config_record_iterator = list_iterator_create(config_list); if (config_record_iterator == NULL) - fatal ("select_nodes: ListIterator_create unable to allocate memory"); + fatal + ("select_nodes: ListIterator_create unable to allocate memory"); - while ((config_record_point = - (struct config_record *) list_next (config_record_iterator))) { + while ((config_record_point = (struct config_record *) + list_next(config_record_iterator))) { - tmp_feature = valid_features (job_ptr->details->features, - config_record_point->feature); + tmp_feature = _valid_features(job_ptr->details->features, + config_record_point-> + feature); if (tmp_feature == 0) continue; /* since nodes can register with more resources than defined */ /* in the configuration, we want to use those higher values */ /* for scheduling, but only as needed */ - if (slurmctld_conf.fast_schedule) /* don't bother checking each node */ + if (slurmctld_conf.fast_schedule) check_node_config = 0; - else if ((job_ptr->details->min_procs > config_record_point->cpus) || - (job_ptr->details->min_memory > config_record_point->real_memory) || - (job_ptr->details->min_tmp_disk > config_record_point->tmp_disk)) { + else if ((job_ptr->details->min_procs > + config_record_point->cpus) + || (job_ptr->details->min_memory > + config_record_point->real_memory) + || (job_ptr->details->min_tmp_disk > + config_record_point->tmp_disk)) { check_node_config = 1; - } - else + } else check_node_config = 0; node_set_ptr[node_set_index].my_bitmap = - bit_copy (config_record_point->node_bitmap); + bit_copy(config_record_point->node_bitmap); if (node_set_ptr[node_set_index].my_bitmap == NULL) - fatal ("bit_copy memory allocation failure"); - bit_and (node_set_ptr[node_set_index].my_bitmap, - part_ptr->node_bitmap); + fatal("bit_copy memory allocation failure"); + bit_and(node_set_ptr[node_set_index].my_bitmap, + part_ptr->node_bitmap); node_set_ptr[node_set_index].nodes = - bit_set_count (node_set_ptr[node_set_index].my_bitmap); + bit_set_count(node_set_ptr[node_set_index].my_bitmap); /* check configuration of individual nodes only if the check */ /* of baseline values in the configuration file are too low. */ /* this will slow the scheduling for very large cluster. */ - if (check_node_config && (node_set_ptr[node_set_index].nodes != 0)) { + if (check_node_config + && (node_set_ptr[node_set_index].nodes != 0)) { for (i = 0; i < node_record_count; i++) { if (bit_test - (node_set_ptr[node_set_index].my_bitmap, i) == 0) + (node_set_ptr[node_set_index]. + my_bitmap, i) == 0) continue; - if ((job_ptr->details->min_procs <= - node_record_table_ptr[i].cpus) + if ((job_ptr->details->min_procs <= + node_record_table_ptr[i].cpus) && (job_ptr->details->min_memory <= - node_record_table_ptr[i].real_memory) + node_record_table_ptr[i]. + real_memory) && (job_ptr->details->min_tmp_disk <= node_record_table_ptr[i].tmp_disk)) continue; - bit_clear (node_set_ptr[node_set_index].my_bitmap, i); - if ((--node_set_ptr[node_set_index].nodes) == 0) + bit_clear(node_set_ptr[node_set_index]. + my_bitmap, i); + if ((--node_set_ptr[node_set_index]. + nodes) == 0) break; } - } + } if (node_set_ptr[node_set_index].nodes == 0) { - bit_free (node_set_ptr[node_set_index].my_bitmap); + bit_free(node_set_ptr[node_set_index].my_bitmap); node_set_ptr[node_set_index].my_bitmap = NULL; continue; - } + } if (req_bitmap) { if (scratch_bitmap) - bit_or (scratch_bitmap, - node_set_ptr[node_set_index].my_bitmap); + bit_or(scratch_bitmap, + node_set_ptr[node_set_index]. + my_bitmap); else { scratch_bitmap = - bit_copy (node_set_ptr[node_set_index].my_bitmap); + bit_copy(node_set_ptr[node_set_index]. + my_bitmap); if (scratch_bitmap == NULL) - fatal ("bit_copy memory allocation failure"); - } - } - node_set_ptr[node_set_index].cpus_per_node = config_record_point->cpus; - node_set_ptr[node_set_index].weight = config_record_point->weight; + fatal + ("bit_copy memory allocation failure"); + } + } + node_set_ptr[node_set_index].cpus_per_node = + config_record_point->cpus; + node_set_ptr[node_set_index].weight = + config_record_point->weight; node_set_ptr[node_set_index].feature = tmp_feature; - debug ("found %d usable nodes from configuration containing nodes %s", - node_set_ptr[node_set_index].nodes, - config_record_point->nodes); + debug + ("found %d usable nodes from configuration containing nodes %s", + node_set_ptr[node_set_index].nodes, + config_record_point->nodes); node_set_index++; - xrealloc (node_set_ptr, sizeof (struct node_set) * (node_set_index + 1)); + xrealloc(node_set_ptr, + sizeof(struct node_set) * (node_set_index + 1)); node_set_ptr[node_set_size++].my_bitmap = NULL; - } + } if (node_set_index == 0) { - info ("select_nodes: no node configurations satisfy requirements procs=%u:mem=%u:disk=%u:feature=%s", - job_ptr->details->min_procs, job_ptr->details->min_memory, - job_ptr->details->min_tmp_disk, job_ptr->details->features); + info("select_nodes: no node configurations satisfy requirements procs=%u:mem=%u:disk=%u:feature=%s", + job_ptr->details->min_procs, + job_ptr->details->min_memory, + job_ptr->details->min_tmp_disk, + job_ptr->details->features); error_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE; goto cleanup; } - /* eliminate last (incomplete) node_set record */ + /* eliminate last (incomplete) node_set record */ if (node_set_ptr[node_set_index].my_bitmap) - bit_free (node_set_ptr[node_set_index].my_bitmap); + bit_free(node_set_ptr[node_set_index].my_bitmap); node_set_ptr[node_set_index].my_bitmap = NULL; node_set_size = node_set_index; if (req_bitmap) { if ((scratch_bitmap == NULL) - || (bit_super_set (req_bitmap, scratch_bitmap) != 1)) { - info ("select_nodes: requested nodes do not satisfy configurations requirements procs=%u:mem=%u:disk=%u:feature=%s", - job_ptr->details->min_procs, job_ptr->details->min_memory, - job_ptr->details->min_tmp_disk, job_ptr->details->features); - error_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE; + || (bit_super_set(req_bitmap, scratch_bitmap) != 1)) { + info("select_nodes: requested nodes do not satisfy configurations requirements procs=%u:mem=%u:disk=%u:feature=%s", + job_ptr->details->min_procs, + job_ptr->details->min_memory, + job_ptr->details->min_tmp_disk, + job_ptr->details->features); + error_code = + ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE; goto cleanup; - } - } + } + } /* pick the nodes providing a best-fit */ - error_code = pick_best_nodes (node_set_ptr, node_set_size, - &req_bitmap, job_ptr->details->num_procs, + error_code = _pick_best_nodes(node_set_ptr, node_set_size, + &req_bitmap, + job_ptr->details->num_procs, job_ptr->details->num_nodes, - job_ptr->details->contiguous, + job_ptr->details->contiguous, job_ptr->details->shared, part_ptr->max_nodes); if (error_code == EAGAIN) { @@ -824,125 +843,134 @@ select_nodes (struct job_record *job_ptr, int test_only) } if (error_code == EINVAL) { error_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE; - info ("select_nodes: no nodes can satisfy job request"); + info("select_nodes: no nodes can satisfy job request"); goto cleanup; - } + } if (test_only) { error_code = SLURM_SUCCESS; goto cleanup; - } + } /* assign the nodes and stage_in the job */ - job_ptr->nodes = bitmap2node_name (req_bitmap); - allocate_nodes (req_bitmap); + job_ptr->nodes = bitmap2node_name(req_bitmap); + allocate_nodes(req_bitmap); job_ptr->node_bitmap = req_bitmap; - build_node_details (job_ptr); + build_node_details(job_ptr); req_bitmap = NULL; job_ptr->job_state = JOB_RUNNING; job_ptr->start_time = job_ptr->time_last_active = time(NULL); if (job_ptr->time_limit == INFINITE) job_ptr->end_time = INFINITE; else - job_ptr->end_time = job_ptr->start_time + (job_ptr->time_limit * 60); + job_ptr->end_time = + job_ptr->start_time + (job_ptr->time_limit * 60); cleanup: if (req_bitmap) - bit_free (req_bitmap); + bit_free(req_bitmap); if (scratch_bitmap) - bit_free (scratch_bitmap); + bit_free(scratch_bitmap); if (node_set_ptr) { for (i = 0; i < node_set_size; i++) { if (node_set_ptr[i].my_bitmap) - bit_free (node_set_ptr[i].my_bitmap); + bit_free(node_set_ptr[i].my_bitmap); } - xfree (node_set_ptr); - } + xfree(node_set_ptr); + } if (config_record_iterator) - list_iterator_destroy (config_record_iterator); + list_iterator_destroy(config_record_iterator); return error_code; } -/* build_node_details - set cpu counts and addresses for allocated nodes - * NOTE: the arrays cpus_per_node, cpu_count_reps and node_addr are allocated - * by build_node_details and must be xfreed by the caller +/* + * build_node_details - set cpu counts and addresses for allocated nodes + * IN job_ptr - pointer to a job record + * NOTE: the arrays cpus_per_node, cpu_count_reps and node_addr in the job + * details record are allocated by build_node_details and must be + * xfreed by the caller, preferably using delete_job_details */ -void -build_node_details (struct job_record *job_ptr) +void build_node_details(struct job_record *job_ptr) { hostlist_t host_list = NULL; struct node_record *node_ptr; char *this_node_name; int node_inx = 0, cpu_inx = -1; - if ((job_ptr->node_bitmap == NULL) || - (job_ptr->nodes == NULL)) { + if ((job_ptr->node_bitmap == NULL) || (job_ptr->nodes == NULL)) { /* No nodes allocated, we're done... */ job_ptr->num_cpu_groups = 0; - job_ptr->node_cnt = 0; - job_ptr->cpus_per_node = NULL; + job_ptr->node_cnt = 0; + job_ptr->cpus_per_node = NULL; job_ptr->cpu_count_reps = NULL; - job_ptr->node_addr = NULL; + job_ptr->node_addr = NULL; return; } job_ptr->num_cpu_groups = 0; - job_ptr->node_cnt = bit_set_count (job_ptr->node_bitmap); - job_ptr->cpus_per_node = xmalloc (sizeof(uint32_t) * job_ptr->node_cnt); - job_ptr->cpu_count_reps = xmalloc (sizeof(uint32_t) * job_ptr->node_cnt); - job_ptr->node_addr = xmalloc (sizeof(slurm_addr) * job_ptr->node_cnt); + job_ptr->node_cnt = bit_set_count(job_ptr->node_bitmap); + job_ptr->cpus_per_node = + xmalloc(sizeof(uint32_t) * job_ptr->node_cnt); + job_ptr->cpu_count_reps = + xmalloc(sizeof(uint32_t) * job_ptr->node_cnt); + job_ptr->node_addr = + xmalloc(sizeof(slurm_addr) * job_ptr->node_cnt); /* Use hostlist here to insure ordering of info matches that of srun */ - if ( (host_list = hostlist_create (job_ptr->nodes)) == NULL) - fatal ("hostlist_create error for %s: %m", job_ptr->nodes); + if ((host_list = hostlist_create(job_ptr->nodes)) == NULL) + fatal("hostlist_create error for %s: %m", job_ptr->nodes); - while ( (this_node_name = hostlist_shift (host_list)) ) { - node_ptr = find_node_record (this_node_name); + while ((this_node_name = hostlist_shift(host_list))) { + node_ptr = find_node_record(this_node_name); if (node_ptr) { int usable_cpus; if (slurmctld_conf.fast_schedule) usable_cpus = node_ptr->config_ptr->cpus; else usable_cpus = node_ptr->cpus; - memcpy (&job_ptr->node_addr[node_inx++], - &node_ptr->slurm_addr, - sizeof (slurm_addr)); - if ((cpu_inx == -1) || - (job_ptr->cpus_per_node[cpu_inx] != usable_cpus)) { + memcpy(&job_ptr->node_addr[node_inx++], + &node_ptr->slurm_addr, sizeof(slurm_addr)); + if ((cpu_inx == -1) || + (job_ptr->cpus_per_node[cpu_inx] != + usable_cpus)) { cpu_inx++; - job_ptr->cpus_per_node[cpu_inx] = usable_cpus; + job_ptr->cpus_per_node[cpu_inx] = + usable_cpus; job_ptr->cpu_count_reps[cpu_inx] = 1; } else job_ptr->cpu_count_reps[cpu_inx]++; } else { - error ("Invalid node %s in job_id %u", - this_node_name, job_ptr->job_id); + error("Invalid node %s in job_id %u", + this_node_name, job_ptr->job_id); } - free (this_node_name); + free(this_node_name); } - hostlist_destroy (host_list); + hostlist_destroy(host_list); if (job_ptr->node_cnt != node_inx) { - error ("Node count mismatch for job_id %u", job_ptr->job_id); + error("Node count mismatch for job_id %u", + job_ptr->job_id); job_ptr->node_cnt = node_inx; } job_ptr->num_cpu_groups = cpu_inx + 1; - xrealloc (job_ptr->cpus_per_node, sizeof(uint32_t *) * job_ptr->num_cpu_groups); - xrealloc (job_ptr->cpu_count_reps, sizeof(uint32_t *) * job_ptr->num_cpu_groups); + xrealloc(job_ptr->cpus_per_node, + sizeof(uint32_t *) * job_ptr->num_cpu_groups); + xrealloc(job_ptr->cpu_count_reps, + sizeof(uint32_t *) * job_ptr->num_cpu_groups); } /* - * valid_features - determine if the requested features are satisfied by those available - * input: requested - requested features (by a job) - * available - available features (on a node) - * output: returns 0 if request is not satisfied, otherwise an integer indicating - * which mutually exclusive feature is satisfied. for example - * valid_features("[fs1|fs2|fs3|fs4]", "fs3") returns 3. see the - * slurm administrator and user guides for details. returns 1 if - * requirements are satisfied without mutually exclusive feature list. + * _valid_features - determine if the requested features are satisfied by + * those available + * IN requested - requested features (by a job) + * IN available - available features (on a node) + * RET 0 if request is not satisfied, otherwise an integer indicating which + * mutually exclusive feature is satisfied. for example + * _valid_features("[fs1|fs2|fs3|fs4]", "fs3") returns 3. see the + * slurm administrator and user guides for details. returns 1 if + * requirements are satisfied without mutually exclusive feature list. */ -int -valid_features (char *requested, char *available) +static int _valid_features(char *requested, char *available) { char *tmp_requested, *str_ptr1; int bracket, found, i, option, position, result; @@ -954,32 +982,33 @@ valid_features (char *requested, char *available) if (available == NULL) return 0; /* no features */ - tmp_requested = xmalloc (strlen (requested) + 1); - strcpy (tmp_requested, requested); + tmp_requested = xmalloc(strlen(requested) + 1); + strcpy(tmp_requested, requested); bracket = option = position = 0; str_ptr1 = tmp_requested; /* start of feature name */ result = last_op = 1; /* assume good for now */ for (i = 0;; i++) { if (tmp_requested[i] == (char) NULL) { - if (strlen (str_ptr1) == 0) + if (strlen(str_ptr1) == 0) break; - found = match_feature (str_ptr1, available); + found = _match_feature(str_ptr1, available); if (last_op == 1) /* and */ result &= found; - else /* or */ + else /* or */ result |= found; break; - } + } if (tmp_requested[i] == '&') { if (bracket != 0) { - info ("valid_features: parsing failure 1 on %s", requested); + info("_valid_features: parsing failure 1 on %s", + requested); result = 0; break; - } + } tmp_requested[i] = (char) NULL; - found = match_feature (str_ptr1, available); + found = _match_feature(str_ptr1, available); if (last_op == 1) /* and */ result &= found; else /* or */ @@ -987,10 +1016,9 @@ valid_features (char *requested, char *available) str_ptr1 = &tmp_requested[i + 1]; last_op = 1; /* and */ - } - else if (tmp_requested[i] == '|') { + } else if (tmp_requested[i] == '|') { tmp_requested[i] = (char) NULL; - found = match_feature (str_ptr1, available); + found = _match_feature(str_ptr1, available); if (bracket != 0) { if (found) option = position; @@ -1003,8 +1031,7 @@ valid_features (char *requested, char *available) str_ptr1 = &tmp_requested[i + 1]; last_op = 0; /* or */ - } - else if (tmp_requested[i] == '[') { + } else if (tmp_requested[i] == '[') { bracket++; position = 1; save_op = last_op; @@ -1012,10 +1039,9 @@ valid_features (char *requested, char *available) last_op = result = 1; str_ptr1 = &tmp_requested[i + 1]; - } - else if (tmp_requested[i] == ']') { + } else if (tmp_requested[i] == ']') { tmp_requested[i] = (char) NULL; - found = match_feature (str_ptr1, available); + found = _match_feature(str_ptr1, available); if (found) option = position; result |= found; @@ -1023,31 +1049,30 @@ valid_features (char *requested, char *available) result &= save_result; else /* or */ result |= save_result; - if ((tmp_requested[i + 1] == '&') && (bracket == 1)) { + if ((tmp_requested[i + 1] == '&') + && (bracket == 1)) { last_op = 1; str_ptr1 = &tmp_requested[i + 2]; - } - else if ((tmp_requested[i + 1] == '|') - && (bracket == 1)) { + } else if ((tmp_requested[i + 1] == '|') + && (bracket == 1)) { last_op = 0; str_ptr1 = &tmp_requested[i + 2]; - } - else if ((tmp_requested[i + 1] == (char) NULL) - && (bracket == 1)) { + } else if ((tmp_requested[i + 1] == (char) NULL) + && (bracket == 1)) { break; - } - else { - error ("valid_features: parsing failure 2 on %s", - requested); + } else { + error + ("_valid_features: parsing failure 2 on %s", + requested); result = 0; break; - } + } bracket = 0; - } + } } if (position) result *= option; - xfree (tmp_requested); + xfree(tmp_requested); return result; } diff --git a/src/slurmctld/pack.c b/src/slurmctld/pack.c deleted file mode 100644 index b63b4b7c2fb..00000000000 --- a/src/slurmctld/pack.c +++ /dev/null @@ -1,143 +0,0 @@ -/*****************************************************************************\ - * pack.c - pack slurmctld structures into buffers understood by the - * slurm_protocol - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Moe Jette <jette1@llnl.gov>, Joseph Ekstrom (ekstrom1@llnl.gov) - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with SLURM; if not, write to the Free Software Foundation, Inc., - * 60 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -#include <assert.h> -#include <ctype.h> -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <src/common/bitstring.h> -#include <src/common/list.h> -#include <src/common/pack.h> -#include <src/common/slurm_protocol_pack.h> -#include <src/slurmctld/slurmctld.h> - -#define BUF_SIZE 1024 - -void -pack_ctld_job_step_info( struct step_record* step, Buf buffer) -{ - char *node_list; - - if (step->node_bitmap) - node_list = bitmap2node_name (step->node_bitmap); - else { - node_list = xmalloc(1); - node_list[0] = '\0'; - } - - pack_job_step_info_members( - step->job_ptr->job_id, - step->step_id, - step->job_ptr->user_id, - step->start_time, - step->job_ptr->partition , - node_list, - buffer - ); - xfree (node_list); -} - -/* pack_ctld_job_step_info_response_msg - packs the message - * IN - job_id and step_id - zero for all - * OUT - packed buffer and length NOTE- MUST free_buf buffer - * return - error code - */ -int -pack_ctld_job_step_info_response_msg ( uint32_t job_id, uint32_t step_id, Buf buffer ) -{ - ListIterator job_record_iterator; - ListIterator step_record_iterator; - int error_code = 0; - uint32_t steps_packed = 0, tmp_offset; - struct step_record* step_ptr; - struct job_record * job_ptr; - - pack_time( last_job_update, buffer ); - pack32( steps_packed , buffer ); /* steps_packed is placeholder for now */ - - if ( job_id == 0 ) - /* Return all steps for all jobs */ - { - job_record_iterator = list_iterator_create (job_list); - while ((job_ptr = (struct job_record *) list_next (job_record_iterator))) { - step_record_iterator = list_iterator_create (job_ptr->step_list); - while ((step_ptr = (struct step_record *) list_next (step_record_iterator))) { - pack_ctld_job_step_info( step_ptr, buffer ); - steps_packed++; - } - list_iterator_destroy (step_record_iterator); - } - list_iterator_destroy (job_record_iterator); - } - - else if ( step_id == 0 ) - /* Return all steps for specific job_id */ - { - job_ptr = find_job_record( job_id ); - if (job_ptr) { - step_record_iterator = list_iterator_create (job_ptr->step_list); - while ((step_ptr = (struct step_record *) list_next (step_record_iterator))) { - pack_ctld_job_step_info( step_ptr, buffer ); - steps_packed++; - } - list_iterator_destroy (step_record_iterator); - } - else - error_code = ESLURM_INVALID_JOB_ID; - } - - else - /* Return step with give step_id/job_id */ - { - job_ptr = find_job_record( job_id ); - step_ptr = find_step_record( job_ptr, step_id ); - if ( step_ptr == NULL ) - error_code = ESLURM_INVALID_JOB_ID; - else { - pack_ctld_job_step_info( step_ptr, buffer ); - steps_packed++; - } - } - - /* put the real record count in the message body header */ - tmp_offset = get_buf_offset (buffer); - set_buf_offset (buffer, 0); - pack_time (last_job_update, buffer); - pack32 (steps_packed, buffer); - set_buf_offset (buffer, tmp_offset); - - return error_code; -} - - diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 37ba9bda2cd..9483d24bf8b 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -27,7 +27,7 @@ \*****************************************************************************/ #ifdef HAVE_CONFIG_H -# include <config.h> +# include "config.h" #endif #include <ctype.h> @@ -42,44 +42,47 @@ #include <sys/stat.h> #include <fcntl.h> -#include <src/common/hostlist.h> -#include <src/common/list.h> -#include <src/common/pack.h> -#include <src/common/xstring.h> -#include <src/slurmctld/locks.h> -#include <src/slurmctld/slurmctld.h> +#include "src/common/hostlist.h" +#include "src/common/list.h" +#include "src/common/pack.h" +#include "src/common/xstring.h" +#include "src/slurmctld/locks.h" +#include "src/slurmctld/slurmctld.h" #define BUF_SIZE 1024 +/* Global variables */ struct part_record default_part; /* default configuration values */ List part_list = NULL; /* partition list */ char default_part_name[MAX_NAME_LEN]; /* name of default partition */ -struct part_record *default_part_loc = NULL; /* location of default partition */ -time_t last_part_update; /* time of last update to partition records */ +struct part_record *default_part_loc = NULL; /* default partition location */ +time_t last_part_update; /* time of last update to partition records */ -static int build_part_bitmap (struct part_record *part_record_point); -static void dump_part_state (struct part_record *part_record_point, Buf buffer); -static uid_t *get_groups_members (char *group_names); -static uid_t *get_group_members (char *group_name); -static time_t get_group_tlm (void); -static void list_delete_part (void *part_entry); -static int uid_list_size (uid_t *uid_list_ptr); +static int _build_part_bitmap(struct part_record *part_record_point); +static int _delete_part_record(char *name); +static void _dump_part_state(struct part_record *part_record_point, + Buf buffer); +static uid_t *_get_groups_members(char *group_names); +static uid_t *_get_group_members(char *group_name); +static time_t _get_group_tlm(void); +static void _list_delete_part(void *part_entry); +static int _uid_list_size(uid_t * uid_list_ptr); /* - * build_part_bitmap - update the total_cpus, total_nodes, and node_bitmap for the specified - * partition, also reset the partition pointers in the node back to this partition. - * input: part_record_point - pointer to the partition - * output: returns 0 if no error, errno otherwise + * _build_part_bitmap - update the total_cpus, total_nodes, and node_bitmap + * for the specified partition, also reset the partition pointers in + * the node back to this partition. + * IN part_record_point - pointer to the partition + * RET 0 if no error, errno otherwise * global: node_record_table_ptr - pointer to global node table - * NOTE: this does not report nodes defined in more than one partition. this is checked only - * upon reading the configuration file, not on an update + * NOTE: this does not report nodes defined in more than one partition. this + * is checked only upon reading the configuration file, not on an update */ -int -build_part_bitmap (struct part_record *part_record_point) +static int _build_part_bitmap(struct part_record *part_record_point) { int i, update_nodes; - char *this_node_name ; + char *this_node_name; bitstr_t *old_bitmap; struct node_record *node_record_point; /* pointer to node_record */ hostlist_t host_list; @@ -88,64 +91,70 @@ build_part_bitmap (struct part_record *part_record_point) part_record_point->total_nodes = 0; if (part_record_point->node_bitmap == NULL) { - part_record_point->node_bitmap = (bitstr_t *) bit_alloc (node_record_count); + part_record_point->node_bitmap = + (bitstr_t *) bit_alloc(node_record_count); if (part_record_point->node_bitmap == NULL) fatal("bit_alloc memory allocation failure"); old_bitmap = NULL; - } - else { - old_bitmap = bit_copy (part_record_point->node_bitmap); - bit_nclear (part_record_point->node_bitmap, 0, node_record_count-1); + } else { + old_bitmap = bit_copy(part_record_point->node_bitmap); + bit_nclear(part_record_point->node_bitmap, 0, + node_record_count - 1); } - if (part_record_point->nodes == NULL) { /* no nodes in partition */ - if (old_bitmap) /* leave with empty bitmap */ - bit_free (old_bitmap); + if (part_record_point->nodes == NULL) { /* no nodes in partition */ + if (old_bitmap) /* leave with empty bitmap */ + bit_free(old_bitmap); return 0; } - if ( (host_list = hostlist_create (part_record_point->nodes)) == NULL) { + if ((host_list = + hostlist_create(part_record_point->nodes)) == NULL) { if (old_bitmap) - bit_free (old_bitmap); - error ("hostlist_create error on %s, %m", part_record_point->nodes); + bit_free(old_bitmap); + error("hostlist_create error on %s, %m", + part_record_point->nodes); return ESLURM_INVALID_NODE_NAME; } - while ( (this_node_name = hostlist_shift (host_list)) ) { - node_record_point = find_node_record (this_node_name); + while ((this_node_name = hostlist_shift(host_list))) { + node_record_point = find_node_record(this_node_name); if (node_record_point == NULL) { - error ("build_part_bitmap: invalid node specified %s", this_node_name); - free (this_node_name); + error + ("_build_part_bitmap: invalid node specified %s", + this_node_name); + free(this_node_name); if (old_bitmap) - bit_free (old_bitmap); - hostlist_destroy (host_list); + bit_free(old_bitmap); + hostlist_destroy(host_list); return ESLURM_INVALID_NODE_NAME; - } + } part_record_point->total_nodes++; part_record_point->total_cpus += node_record_point->cpus; node_record_point->partition_ptr = part_record_point; - if (old_bitmap) - bit_clear (old_bitmap, - (int) (node_record_point - node_record_table_ptr)); - bit_set (part_record_point->node_bitmap, - (int) (node_record_point - node_record_table_ptr)); - free (this_node_name); + if (old_bitmap) + bit_clear(old_bitmap, + (int) (node_record_point - + node_record_table_ptr)); + bit_set(part_record_point->node_bitmap, + (int) (node_record_point - node_record_table_ptr)); + free(this_node_name); } - hostlist_destroy (host_list); + hostlist_destroy(host_list); /* unlink nodes removed from the partition */ if (old_bitmap) { update_nodes = 0; for (i = 0; i < node_record_count; i++) { - if (bit_test (old_bitmap, i) == 0) + if (bit_test(old_bitmap, i) == 0) continue; node_record_table_ptr[i].partition_ptr = NULL; update_nodes = 1; } - bit_free (old_bitmap); + bit_free(old_bitmap); if (update_nodes) - last_node_update = time (NULL); - } + last_node_update = time(NULL); + } return 0; } @@ -153,23 +162,22 @@ build_part_bitmap (struct part_record *part_record_point) /* * create_part_record - create a partition record - * output: returns a pointer to the record or NULL if error + * RET a pointer to the record or NULL if error * global: default_part - default partition parameters * part_list - global partition list * NOTE: the record's values are initialized to those of default_part - * NOTE: allocates memory that should be xfreed with delete_part_record + * NOTE: allocates memory that should be xfreed with _delete_part_record */ -struct part_record * -create_part_record (void) +struct part_record *create_part_record(void) { struct part_record *part_record_point; - last_part_update = time (NULL); + last_part_update = time(NULL); part_record_point = - (struct part_record *) xmalloc (sizeof (struct part_record)); + (struct part_record *) xmalloc(sizeof(struct part_record)); - strcpy (part_record_point->name, "DEFAULT"); + strcpy(part_record_point->name, "DEFAULT"); part_record_point->max_time = default_part.max_time; part_record_point->max_nodes = default_part.max_nodes; part_record_point->root_only = default_part.root_only; @@ -182,126 +190,128 @@ create_part_record (void) if (default_part.allow_groups) { part_record_point->allow_groups = - (char *) xmalloc (strlen (default_part.allow_groups) + 1); - strcpy (part_record_point->allow_groups, - default_part.allow_groups); - } - else + (char *) xmalloc(strlen(default_part.allow_groups) + 1); + strcpy(part_record_point->allow_groups, + default_part.allow_groups); + } else part_record_point->allow_groups = NULL; if (default_part.nodes) { part_record_point->nodes = - (char *) xmalloc (strlen (default_part.nodes) + 1); - strcpy (part_record_point->nodes, default_part.nodes); - } - else + (char *) xmalloc(strlen(default_part.nodes) + 1); + strcpy(part_record_point->nodes, default_part.nodes); + } else part_record_point->nodes = NULL; - if (list_append (part_list, part_record_point) == NULL) - fatal ("create_part_record: unable to allocate memory"); + if (list_append(part_list, part_record_point) == NULL) + fatal("create_part_record: unable to allocate memory"); return part_record_point; } /* - * delete_part_record - delete record for partition with specified name - * input: name - name of the desired node, delete all partitions if pointer is NULL - * output: return 0 on success, errno otherwise + * _delete_part_record - delete record for partition with specified name + * IN name - name of the desired node, delete all partitions if NULL + * RET 0 on success, errno otherwise * global: part_list - global partition list */ -int -delete_part_record (char *name) +static int _delete_part_record(char *name) { int i; - last_part_update = time (NULL); + last_part_update = time(NULL); if (name == NULL) - i = list_delete_all (part_list, &list_find_part, - "universal_key"); + i = list_delete_all(part_list, &list_find_part, + "universal_key"); else - i = list_delete_all (part_list, &list_find_part, name); + i = list_delete_all(part_list, &list_find_part, name); if ((name == NULL) || (i != 0)) return 0; - error ("delete_part_record: attempt to delete non-existent partition %s", name); + error + ("_delete_part_record: attempt to delete non-existent partition %s", + name); return ENOENT; } /* dump_all_part_state - save the state of all partitions to file */ -int -dump_all_part_state ( void ) +int dump_all_part_state(void) { ListIterator part_record_iterator; struct part_record *part_record_point; int error_code = 0, log_fd; char *old_file, *new_file, *reg_file; /* Locks: Read partition */ - slurmctld_lock_t part_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; - Buf buffer = init_buf(BUF_SIZE*16); + slurmctld_lock_t part_read_lock = + { READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; + Buf buffer = init_buf(BUF_SIZE * 16); /* write header: time */ - pack_time (time (NULL), buffer); + pack_time(time(NULL), buffer); /* write partition records to buffer */ - lock_slurmctld (part_read_lock); - part_record_iterator = list_iterator_create (part_list); - while ((part_record_point = (struct part_record *) list_next (part_record_iterator))) { + lock_slurmctld(part_read_lock); + part_record_iterator = list_iterator_create(part_list); + while ((part_record_point = + (struct part_record *) list_next(part_record_iterator))) { if (part_record_point->magic != PART_MAGIC) - fatal ("dump_all_part_state: data integrity is bad"); - - dump_part_state (part_record_point, buffer); - } - list_iterator_destroy (part_record_iterator); - unlock_slurmctld (part_read_lock); + fatal("dump_all_part_state: data integrity is bad"); + _dump_part_state(part_record_point, buffer); + } + list_iterator_destroy(part_record_iterator); + unlock_slurmctld(part_read_lock); /* write the buffer to file */ - old_file = xstrdup (slurmctld_conf.state_save_location); - xstrcat (old_file, "/part_state.old"); - reg_file = xstrdup (slurmctld_conf.state_save_location); - xstrcat (reg_file, "/part_state"); - new_file = xstrdup (slurmctld_conf.state_save_location); - xstrcat (new_file, "/part_state.new"); - lock_state_files (); - log_fd = creat (new_file, 0600); + old_file = xstrdup(slurmctld_conf.state_save_location); + xstrcat(old_file, "/part_state.old"); + reg_file = xstrdup(slurmctld_conf.state_save_location); + xstrcat(reg_file, "/part_state"); + new_file = xstrdup(slurmctld_conf.state_save_location); + xstrcat(new_file, "/part_state.new"); + lock_state_files(); + log_fd = creat(new_file, 0600); if (log_fd == 0) { - error ("Can't save state, error creating file %s, %m", new_file); + error("Can't save state, error creating file %s, %m", + new_file); error_code = errno; - } - else { - if (write (log_fd, get_buf_data(buffer), get_buf_offset(buffer)) != - get_buf_offset(buffer)) { - error ("Can't save state, error writing file %s, %m", new_file); + } else { + if (write + (log_fd, get_buf_data(buffer), + get_buf_offset(buffer)) != get_buf_offset(buffer)) { + error + ("Can't save state, error writing file %s, %m", + new_file); error_code = errno; } - close (log_fd); + close(log_fd); } - if (error_code) - (void) unlink (new_file); - else { /* file shuffle */ - (void) unlink (old_file); - (void) link (reg_file, old_file); - (void) unlink (reg_file); - (void) link (new_file, reg_file); - (void) unlink (new_file); + if (error_code) + (void) unlink(new_file); + else { /* file shuffle */ + (void) unlink(old_file); + (void) link(reg_file, old_file); + (void) unlink(reg_file); + (void) link(new_file, reg_file); + (void) unlink(new_file); } - xfree (old_file); - xfree (reg_file); - xfree (new_file); - unlock_state_files (); + xfree(old_file); + xfree(reg_file); + xfree(new_file); + unlock_state_files(); - free_buf (buffer); + free_buf(buffer); return 0; } /* - * dump_part_state - dump the state of a specific partition to a buffer - * part_record_point (I) - pointer to partition for which information is requested - * buffer (I/O) - location to store data, pointers automatically advanced + * _dump_part_state - dump the state of a specific partition to a buffer + * IN part_record_point - pointer to partition for which information + * is requested + * IN/OUT buffer - location to store data, pointers automatically advanced */ -void -dump_part_state (struct part_record *part_record_point, Buf buffer) +static void _dump_part_state(struct part_record *part_record_point, Buf buffer) { uint16_t default_part_flag; @@ -310,25 +320,25 @@ dump_part_state (struct part_record *part_record_point, Buf buffer) else default_part_flag = 0; - packstr (part_record_point->name, buffer); - pack32 (part_record_point->max_time, buffer); - pack32 (part_record_point->max_nodes, buffer); + packstr(part_record_point->name, buffer); + pack32(part_record_point->max_time, buffer); + pack32(part_record_point->max_nodes, buffer); - pack16 (default_part_flag, buffer); - pack16 ((uint16_t)part_record_point->root_only, buffer); - pack16 ((uint16_t)part_record_point->shared, buffer); + pack16(default_part_flag, buffer); + pack16((uint16_t) part_record_point->root_only, buffer); + pack16((uint16_t) part_record_point->shared, buffer); - pack16 ((uint16_t)part_record_point->state_up, buffer); - packstr (part_record_point->allow_groups, buffer); - packstr (part_record_point->nodes, buffer); + pack16((uint16_t) part_record_point->state_up, buffer); + packstr(part_record_point->allow_groups, buffer); + packstr(part_record_point->nodes, buffer); } /* - * load_part_state - load the partition state from file, recover from slurmctld restart. - * execute this after loading the configuration file data. + * load_part_state - load the partition state from file, recover from + * slurmctld restart. execute this after loading the configuration + * file data. */ -int -load_part_state ( void ) +int load_part_state(void) { char *part_name, *allow_groups, *nodes, *state_file, *data = NULL; uint32_t max_time, max_nodes; @@ -341,121 +351,126 @@ load_part_state ( void ) Buf buffer; /* read the file */ - state_file = xstrdup (slurmctld_conf.state_save_location); - xstrcat (state_file, "/part_state"); - lock_state_files (); - state_fd = open (state_file, O_RDONLY); + state_file = xstrdup(slurmctld_conf.state_save_location); + xstrcat(state_file, "/part_state"); + lock_state_files(); + state_fd = open(state_file, O_RDONLY); if (state_fd < 0) { - info ("No partition state file (%s) to recover", state_file); + info("No partition state file (%s) to recover", + state_file); error_code = ENOENT; - } - else { + } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); - while ((data_read = read (state_fd, &data[data_size], BUF_SIZE)) == BUF_SIZE) { + while ((data_read = + read(state_fd, &data[data_size], + BUF_SIZE)) == BUF_SIZE) { data_size += data_read; data_allocated += BUF_SIZE; xrealloc(data, data_allocated); } data_size += data_read; - close (state_fd); - if (data_read < 0) - error ("Error reading file %s: %m", state_file); + close(state_fd); + if (data_read < 0) + error("Error reading file %s: %m", state_file); } - xfree (state_file); - unlock_state_files (); - - buffer = create_buf (data, data_size); - safe_unpack_time (&time, buffer); - - while (remaining_buf (buffer) > 0) { - safe_unpackstr_xmalloc (&part_name, &name_len, buffer); - safe_unpack32 (&max_time, buffer); - safe_unpack32 (&max_nodes, buffer); - safe_unpack16 (&def_part_flag, buffer); - safe_unpack16 (&root_only, buffer); - safe_unpack16 (&shared, buffer); - safe_unpack16 (&state_up, buffer); - safe_unpackstr_xmalloc (&allow_groups, &name_len, buffer); - safe_unpackstr_xmalloc (&nodes, &name_len, buffer); + xfree(state_file); + unlock_state_files(); + + buffer = create_buf(data, data_size); + safe_unpack_time(&time, buffer); + + while (remaining_buf(buffer) > 0) { + safe_unpackstr_xmalloc(&part_name, &name_len, buffer); + safe_unpack32(&max_time, buffer); + safe_unpack32(&max_nodes, buffer); + safe_unpack16(&def_part_flag, buffer); + safe_unpack16(&root_only, buffer); + safe_unpack16(&shared, buffer); + safe_unpack16(&state_up, buffer); + safe_unpackstr_xmalloc(&allow_groups, &name_len, buffer); + safe_unpackstr_xmalloc(&nodes, &name_len, buffer); /* validity test as possible */ - if ((def_part_flag > 1) || - (root_only > 1) || - (shared > SHARED_FORCE) || - (state_up > 1)) { - error ("Invalid data for partition %s: def_part_flag=%u, root_only=%u, shared=%u, state_up=%u", - part_name, def_part_flag, root_only, shared, state_up); - error ("No more partition data will be processed from the checkpoint file"); + if ((def_part_flag > 1) || + (root_only > 1) || + (shared > SHARED_FORCE) || (state_up > 1)) { + error + ("Invalid data for partition %s: def_part_flag=%u, root_only=%u, shared=%u, state_up=%u", + part_name, def_part_flag, root_only, shared, + state_up); + error + ("No more partition data will be processed from the checkpoint file"); if (part_name) - xfree (part_name); + xfree(part_name); error_code = EINVAL; - break; + break; } /* find record and perform update */ - part_ptr = list_find_first (part_list, &list_find_part, part_name); + part_ptr = + list_find_first(part_list, &list_find_part, part_name); if (part_ptr) { part_ptr->max_time = max_time; part_ptr->max_nodes = max_nodes; if (def_part_flag) { - strcpy (default_part_name, part_name); - default_part_loc = part_ptr; + strcpy(default_part_name, part_name); + default_part_loc = part_ptr; } part_ptr->root_only = root_only; part_ptr->shared = shared; part_ptr->state_up = state_up; if (part_ptr->allow_groups) - xfree (part_ptr->allow_groups); + xfree(part_ptr->allow_groups); part_ptr->allow_groups = allow_groups; if (part_ptr->nodes) - xfree (part_ptr->nodes); + xfree(part_ptr->nodes); part_ptr->nodes = nodes; } else { - info ("load_part_state: partition %s removed from configuration file.", - part_name); + info("load_part_state: partition %s removed from configuration file", + part_name); } if (part_name) - xfree (part_name); + xfree(part_name); } - free_buf (buffer); + free_buf(buffer); return error_code; -unpack_error: - error ("Incomplete partition data checkpoint file. State not completely restored"); - free_buf (buffer); + unpack_error: + error + ("Incomplete partition data checkpoint file. State not completely restored"); + free_buf(buffer); return EFAULT; } /* * find_part_record - find a record for partition with specified name - * input: name - name of the desired partition - * output: return pointer to node partition or null if not found + * IN name - name of the desired partition + * RET pointer to node partition or NULL if not found * global: part_list - global partition list */ -struct part_record * -find_part_record (char *name){ - return list_find_first (part_list, &list_find_part, name); +struct part_record *find_part_record(char *name) +{ + return list_find_first(part_list, &list_find_part, name); } /* - * init_part_conf - initialize the default partition configuration values and create - * a (global) partition list. + * init_part_conf - initialize the default partition configuration values + * and create a (global) partition list. * this should be called before creating any partition entries. - * output: return value - 0 if no error, otherwise an error code + * RET 0 if no error, otherwise an error code * global: default_part - default partition values * part_list - global partition list */ -int -init_part_conf () +int init_part_conf(void) { - last_part_update = time (NULL); + last_part_update = time(NULL); - strcpy (default_part.name, "DEFAULT"); + strcpy(default_part.name, "DEFAULT"); default_part.max_time = INFINITE; default_part.max_nodes = INFINITE; default_part.root_only = 0; @@ -464,76 +479,78 @@ init_part_conf () default_part.total_nodes = 0; default_part.total_cpus = 0; if (default_part.nodes) - xfree (default_part.nodes); + xfree(default_part.nodes); default_part.nodes = (char *) NULL; if (default_part.allow_groups) - xfree (default_part.allow_groups); + xfree(default_part.allow_groups); default_part.allow_groups = (char *) NULL; if (default_part.allow_uids) - xfree (default_part.allow_uids); + xfree(default_part.allow_uids); default_part.allow_uids = (uid_t *) NULL; if (default_part.node_bitmap) - bit_free (default_part.node_bitmap); + bit_free(default_part.node_bitmap); default_part.node_bitmap = (bitstr_t *) NULL; if (part_list) /* delete defunct partitions */ - (void) delete_part_record (NULL); + (void) _delete_part_record(NULL); else - part_list = list_create (&list_delete_part); + part_list = list_create(&_list_delete_part); - if (part_list == NULL) - fatal ("init_part_conf: list_create can not allocate memory"); - + if (part_list == NULL) + fatal + ("init_part_conf: list_create can not allocate memory"); - strcpy (default_part_name, ""); + + strcpy(default_part_name, ""); default_part_loc = (struct part_record *) NULL; return 0; } /* - * list_delete_part - delete an entry from the global partition list, + * _list_delete_part - delete an entry from the global partition list, * see common/list.h for documentation * global: node_record_count - count of nodes in the system * node_record_table_ptr - pointer to global node table */ -void -list_delete_part (void *part_entry) +static void _list_delete_part(void *part_entry) { struct part_record *part_record_point; /* pointer to part_record */ int i; part_record_point = (struct part_record *) part_entry; for (i = 0; i < node_record_count; i++) { - if (node_record_table_ptr[i].partition_ptr != part_record_point) + if (node_record_table_ptr[i].partition_ptr != + part_record_point) continue; node_record_table_ptr[i].partition_ptr = NULL; - } + } if (part_record_point->allow_groups) - xfree (part_record_point->allow_groups); + xfree(part_record_point->allow_groups); if (part_record_point->allow_uids) - xfree (part_record_point->allow_uids); + xfree(part_record_point->allow_uids); if (part_record_point->nodes) - xfree (part_record_point->nodes); + xfree(part_record_point->nodes); if (part_record_point->node_bitmap) - bit_free (part_record_point->node_bitmap); - xfree (part_entry); + bit_free(part_record_point->node_bitmap); + xfree(part_entry); } /* - * list_find_part - find an entry in the partition list, see common/list.h for documentation, - * key is partition name or "universal_key" for all partitions + * list_find_part - find an entry in the partition list, see common/list.h + * for documentation + * IN key - partition name or "universal_key" for all partitions + * RET 1 if matches key, 0 otherwise * global- part_list - the global partition list */ -int -list_find_part (void *part_entry, void *key) +int list_find_part(void *part_entry, void *key) { - if (strcmp (key, "universal_key") == 0) + if (strcmp(key, "universal_key") == 0) return 1; - if (strncmp (((struct part_record *) part_entry)->name, - (char *) key, MAX_NAME_LEN) == 0) + if (strncmp(((struct part_record *) part_entry)->name, + (char *) key, MAX_NAME_LEN) == 0) return 1; return 0; @@ -543,21 +560,17 @@ list_find_part (void *part_entry, void *key) /* * pack_all_part - dump all partition information for all partitions in * machine independent form (for network transmission) - * input: buffer_ptr - location into which a pointer to the data is to be stored. - * the calling function must xfree the storage. - * buffer_size - location into which the size of the created buffer is in bytes - * update_time - dump new data only if partition records updated since time - * specified, otherwise return empty buffer - * output: buffer_ptr - the pointer is set to the allocated buffer. - * buffer_size - set to size of the buffer in bytes - * update_time - set to time partition records last updated + * OUT buffer_ptr - the pointer is set to the allocated buffer. + * OUT buffer_size - set to size of the buffer in bytes + * IN/OUT update_time - dump new data only if partition records updated , + * set to time partition records last updated + * since time specified, otherwise return empty buffer * global: part_list - global list of partition records * NOTE: the buffer at *buffer_ptr must be xfreed by the caller - * NOTE: change PART_STRUCT_VERSION in common/slurmlib.h whenever the format changes - * NOTE: change slurm_load_part() in api/part_info.c whenever the data format changes + * NOTE: change slurm_load_part() in api/part_info.c if data format changes */ -void -pack_all_part (char **buffer_ptr, int *buffer_size, time_t * update_time) +void +pack_all_part(char **buffer_ptr, int *buffer_size, time_t * update_time) { ListIterator part_record_iterator; struct part_record *part_record_point; @@ -569,49 +582,49 @@ pack_all_part (char **buffer_ptr, int *buffer_size, time_t * update_time) if (*update_time == last_part_update) return; - buffer = init_buf (BUF_SIZE*16); + buffer = init_buf(BUF_SIZE * 16); /* write haeader: version and time */ - parts_packed = 0 ; - pack32 ((uint32_t) parts_packed, buffer); - pack_time (last_part_update, buffer); + parts_packed = 0; + pack32((uint32_t) parts_packed, buffer); + pack_time(last_part_update, buffer); /* write individual partition records */ - part_record_iterator = list_iterator_create (part_list); - while ((part_record_point = - (struct part_record *) list_next (part_record_iterator))) { + part_record_iterator = list_iterator_create(part_list); + while ((part_record_point = + (struct part_record *) list_next(part_record_iterator))) { if (part_record_point->magic != PART_MAGIC) - fatal ("pack_all_part: data integrity is bad"); + fatal("pack_all_part: data integrity is bad"); pack_part(part_record_point, buffer); - parts_packed ++ ; - } + parts_packed++; + } - list_iterator_destroy (part_record_iterator); + list_iterator_destroy(part_record_iterator); - /* put the real record count in the message body header */ - tmp_offset = get_buf_offset (buffer); - set_buf_offset (buffer, 0); - pack32 ((uint32_t) parts_packed, buffer); - set_buf_offset (buffer, tmp_offset); + /* put the real record count in the message body header */ + tmp_offset = get_buf_offset(buffer); + set_buf_offset(buffer, 0); + pack32((uint32_t) parts_packed, buffer); + set_buf_offset(buffer, tmp_offset); *update_time = last_part_update; - *buffer_size = get_buf_offset (buffer); - buffer_ptr[0] = xfer_buf_data (buffer); + *buffer_size = get_buf_offset(buffer); + buffer_ptr[0] = xfer_buf_data(buffer); } /* - * pack_part - dump all configuration information about a specific partition in - * machine independent form (for network transmission) - * dump_part_ptr (I) - pointer to partition for which information is requested - * buffer (I/O) - buffer in which data is place, pointers automatically updated + * pack_part - dump all configuration information about a specific partition + * in machine independent form (for network transmission) + * IN dump_part_ptr - pointer to partition for which information is requested + * IN/OUT buffer - buffer in which data is placed, pointers automatically + * updated * global: default_part_loc - pointer to the default partition * NOTE: if you make any changes here be sure to make the corresponding * changes to load_part_config in api/partition_info.c */ -void -pack_part (struct part_record *part_record_point, Buf buffer) +void pack_part(struct part_record *part_record_point, Buf buffer) { uint16_t default_part_flag; char node_inx_ptr[BUF_SIZE]; @@ -621,146 +634,155 @@ pack_part (struct part_record *part_record_point, Buf buffer) else default_part_flag = 0; - packstr (part_record_point->name, buffer); - pack32 (part_record_point->max_time, buffer); - pack32 (part_record_point->max_nodes, buffer); - pack32 (part_record_point->total_nodes, buffer); + packstr(part_record_point->name, buffer); + pack32(part_record_point->max_time, buffer); + pack32(part_record_point->max_nodes, buffer); + pack32(part_record_point->total_nodes, buffer); - pack32 (part_record_point->total_cpus, buffer); - pack16 (default_part_flag, buffer); - pack16 ((uint16_t)part_record_point->root_only, buffer); - pack16 ((uint16_t)part_record_point->shared, buffer); + pack32(part_record_point->total_cpus, buffer); + pack16(default_part_flag, buffer); + pack16((uint16_t) part_record_point->root_only, buffer); + pack16((uint16_t) part_record_point->shared, buffer); - pack16 ((uint16_t)part_record_point->state_up, buffer); - packstr (part_record_point->allow_groups, buffer); - packstr (part_record_point->nodes, buffer); + pack16((uint16_t) part_record_point->state_up, buffer); + packstr(part_record_point->allow_groups, buffer); + packstr(part_record_point->nodes, buffer); if (part_record_point->node_bitmap) { - bit_fmt (node_inx_ptr, BUF_SIZE, part_record_point->node_bitmap); - packstr (node_inx_ptr, buffer); - } - else - packstr ("", buffer); + bit_fmt(node_inx_ptr, BUF_SIZE, + part_record_point->node_bitmap); + packstr(node_inx_ptr, buffer); + } else + packstr("", buffer); } /* * update_part - update a partition's configuration data + * IN part_desc - description of partition changes + * RET 0 or an error code * global: part_list - list of partition entries * last_part_update - update time of partition records */ -int -update_part (update_part_msg_t * part_desc ) +int update_part(update_part_msg_t * part_desc) { int error_code, i; struct part_record *part_ptr; - if ((part_desc -> name == NULL ) || - (strlen (part_desc->name ) >= MAX_NAME_LEN)) { - error ("update_part: invalid partition name %s", part_desc->name); - return ESLURM_INVALID_PARTITION_NAME ; - } + if ((part_desc->name == NULL) || + (strlen(part_desc->name) >= MAX_NAME_LEN)) { + error("update_part: invalid partition name %s", + part_desc->name); + return ESLURM_INVALID_PARTITION_NAME; + } error_code = 0; - part_ptr = list_find_first (part_list, &list_find_part, part_desc->name); + part_ptr = + list_find_first(part_list, &list_find_part, part_desc->name); if (part_ptr == NULL) { - error ("update_part: partition %s does not exist, being created.", - part_desc->name); - part_ptr = create_part_record (); - strcpy(part_ptr->name, part_desc->name ); - } + error + ("update_part: partition %s does not exist, being created", + part_desc->name); + part_ptr = create_part_record(); + strcpy(part_ptr->name, part_desc->name); + } - last_part_update = time (NULL); + last_part_update = time(NULL); if (part_desc->max_time != NO_VAL) { - info ("update_part: setting max_time to %d for partition %s", - part_desc->max_time, part_desc->name); + info("update_part: setting max_time to %d for partition %s", + part_desc->max_time, part_desc->name); part_ptr->max_time = part_desc->max_time; - } + } if (part_desc->max_nodes != NO_VAL) { - info ("update_part: setting max_nodes to %d for partition %s", - part_desc->max_nodes, part_desc->name); + info("update_part: setting max_nodes to %d for partition %s", + part_desc->max_nodes, part_desc->name); part_ptr->max_nodes = part_desc->max_nodes; - } + } if (part_desc->root_only != (uint16_t) NO_VAL) { - info ("update_part: setting root_only to %d for partition %s", - part_desc->root_only, part_desc->name); + info("update_part: setting root_only to %d for partition %s", + part_desc->root_only, part_desc->name); part_ptr->root_only = part_desc->root_only; - } + } if (part_desc->state_up != (uint16_t) NO_VAL) { - info ("update_part: setting state_up to %d for partition %s", - part_desc->state_up, part_desc->name); + info("update_part: setting state_up to %d for partition %s", + part_desc->state_up, part_desc->name); part_ptr->state_up = part_desc->state_up; - } + } if (part_desc->shared != (uint16_t) NO_VAL) { - info ("update_part: setting shared to %d for partition %s", - part_desc->shared, part_desc->name); + info("update_part: setting shared to %d for partition %s", + part_desc->shared, part_desc->name); part_ptr->shared = part_desc->shared; - } + } - if ((part_desc->default_part == 1) && - (strcmp(default_part_name, part_desc->name) != 0)) { - info ("update_part: changing default partition from %s to %s", - default_part_name, part_desc->name); - strcpy (default_part_name, part_desc->name); + if ((part_desc->default_part == 1) && + (strcmp(default_part_name, part_desc->name) != 0)) { + info("update_part: changing default partition from %s to %s", + default_part_name, part_desc->name); + strcpy(default_part_name, part_desc->name); default_part_loc = part_ptr; - } + } if (part_desc->allow_groups != NULL) { if (part_ptr->allow_groups) - xfree (part_ptr->allow_groups); + xfree(part_ptr->allow_groups); i = strlen(part_desc->allow_groups) + 1; part_ptr->allow_groups = xmalloc(i); - strcpy ( part_ptr->allow_groups , part_desc->allow_groups ) ; - info ("update_part: setting allow_groups to %s for partition %s", - part_desc->allow_groups, part_desc->name); + strcpy(part_ptr->allow_groups, part_desc->allow_groups); + info("update_part: setting allow_groups to %s for partition %s", + part_desc->allow_groups, part_desc->name); if (part_ptr->allow_uids) - xfree (part_ptr->allow_uids); - part_ptr->allow_uids = get_groups_members (part_desc->allow_groups); - } + xfree(part_ptr->allow_uids); + part_ptr->allow_uids = + _get_groups_members(part_desc->allow_groups); + } if (part_desc->nodes != NULL) { char *backup_node_list; backup_node_list = part_ptr->nodes; i = strlen(part_desc->nodes) + 1; part_ptr->nodes = xmalloc(i); - strcpy ( part_ptr->nodes , part_desc->nodes ) ; + strcpy(part_ptr->nodes, part_desc->nodes); - error_code = build_part_bitmap (part_ptr); + error_code = _build_part_bitmap(part_ptr); if (error_code) { if (part_ptr->nodes) - xfree (part_ptr->nodes); + xfree(part_ptr->nodes); part_ptr->nodes = backup_node_list; - } - else { - info ("update_part: setting nodes to %s for partition %s", - part_desc->nodes, part_desc->name); + } else { + info("update_part: setting nodes to %s for partition %s", + part_desc->nodes, part_desc->name); if (backup_node_list) xfree(backup_node_list); } } - + return error_code; } -/* validate_group - validate that the submit uid is authorized to run in this partition */ -int -validate_group (struct part_record *part_ptr, uid_t submit_uid) +/* + * validate_group - validate that the submit uid is authorized to run in + * this partition + * IN part_ptr - pointer to a partition + * IN submit_uid - user submitting the job + * RET 1 if permitted to run, 0 otherwise + */ +int validate_group(struct part_record *part_ptr, uid_t submit_uid) { int i; if (part_ptr->allow_groups == NULL) return 1; /* all users allowed */ - if ( (submit_uid == 0) || (submit_uid = getuid ()) ) + if ((submit_uid == 0) || (submit_uid = getuid())) return 1; /* super-user can run anywhere */ if (part_ptr->allow_uids == NULL) return 0; /* no non-super-users in the list */ - for (i=0; part_ptr->allow_uids[i]; i++) { + for (i = 0; part_ptr->allow_uids[i]; i++) { if (part_ptr->allow_uids[i] == submit_uid) return 1; } @@ -768,39 +790,45 @@ validate_group (struct part_record *part_ptr, uid_t submit_uid) } -/* load_part_uid_allow_list - for every partition reload the allow_uid list if "force" - * is true or the GROUP_FILE has changed */ -void -load_part_uid_allow_list ( int force ) +/* + * load_part_uid_allow_list - reload the allow_uid list of partitions + * if required (updated group file or force set) + * IN force - if set then always reload the allow_uid list + */ +void load_part_uid_allow_list(int force) { static time_t last_update_time; time_t temp_time; ListIterator part_record_iterator; struct part_record *part_record_point; - temp_time = get_group_tlm(); - if ( (force == 0) && (temp_time == last_update_time) ) + temp_time = _get_group_tlm(); + if ((force == 0) && (temp_time == last_update_time)) return; - debug ("Updating partition uid access list"); + debug("Updating partition uid access list"); last_update_time = temp_time; - last_part_update = time (NULL); + last_part_update = time(NULL); - part_record_iterator = list_iterator_create (part_list); - while ((part_record_point = (struct part_record *) list_next (part_record_iterator))) { + part_record_iterator = list_iterator_create(part_list); + while ((part_record_point = + (struct part_record *) list_next(part_record_iterator))) { if (part_record_point->allow_uids) - xfree (part_record_point->allow_uids); - part_record_point->allow_uids = get_groups_members (part_record_point->allow_groups); + xfree(part_record_point->allow_uids); + part_record_point->allow_uids = + _get_groups_members(part_record_point->allow_groups); } - list_iterator_destroy (part_record_iterator); + list_iterator_destroy(part_record_iterator); } -/* get_groups_members - indentify the users in a comma delimited list of group names - * Returns a zero terminated list of its UIDs or NULL on error - * NOTE: User root has implicitly access to every group (the zero terminating uid) - * NOTE: The caller must xfree non-NULL return values */ -uid_t * -get_groups_members (char *group_names) +/* + * _get_groups_members - indentify the users in a list of group names + * IN group_names - a comma delimited list of group names + * RET a zero terminated list of its UIDs or NULL on error + * NOTE: User root has implicitly access to every group + * NOTE: The caller must xfree non-NULL return values + */ +uid_t *_get_groups_members(char *group_names) { int *group_uids = NULL; int *temp_uids = NULL; @@ -811,36 +839,37 @@ get_groups_members (char *group_names) return NULL; i = strlen(group_names) + 1; - tmp_names = xmalloc (i); - strcpy (tmp_names, group_names); + tmp_names = xmalloc(i); + strcpy(tmp_names, group_names); - one_group_name = strtok_r (tmp_names, ",", &name_ptr); - while ( one_group_name ) { - temp_uids = get_group_members (one_group_name); + one_group_name = strtok_r(tmp_names, ",", &name_ptr); + while (one_group_name) { + temp_uids = _get_group_members(one_group_name); if (group_uids) { /* concatenate the uid_lists and free the new one */ - i = uid_list_size (group_uids); - j = uid_list_size (temp_uids); - xrealloc (group_uids, sizeof (uid_t) * (i+j+1)); - for (k=0; k<=j; k++) - group_uids[i+k] = temp_uids[k]; - xfree (temp_uids); - } - else + i = _uid_list_size(group_uids); + j = _uid_list_size(temp_uids); + xrealloc(group_uids, sizeof(uid_t) * (i + j + 1)); + for (k = 0; k <= j; k++) + group_uids[i + k] = temp_uids[k]; + xfree(temp_uids); + } else group_uids = temp_uids; - one_group_name = strtok_r (NULL, ",", &name_ptr); + one_group_name = strtok_r(NULL, ",", &name_ptr); } - xfree (tmp_names); + xfree(tmp_names); return group_uids; } -/* get_group_members - indentify the users in a given group name - * Returns a zero terminated list of its UIDs or NULL on error - * NOTE: User root has implicitly access to every group (the zero terminating uid) - * NOTE: The caller must xfree non-NULL return values */ -uid_t * -get_group_members (char *group_name) +/* + * _get_group_members - indentify the users in a given group name + * IN group_name - a single group name + * RET a zero terminated list of its UIDs or NULL on error + * NOTE: User root has implicitly access to every group + * NOTE: The caller must xfree non-NULL return values + */ +uid_t *_get_group_members(char *group_name) { struct group *group_struct_ptr; struct passwd *user_pw_ptr; @@ -848,77 +877,75 @@ get_group_members (char *group_name) int *group_uids = NULL; int uid_cnt = 0; - group_struct_ptr = getgrnam(group_name); /* Note: static memory, do not free */ + group_struct_ptr = getgrnam(group_name); /* Note: static memory, + * do not free */ if (group_struct_ptr == NULL) { - error ("Could not find configured group %s\n", group_name); - setgrent (); + error("Could not find configured group %s", group_name); + setgrent(); return NULL; } - for (i=0; ; i++) { + for (i = 0;; i++) { if (group_struct_ptr->gr_mem[i] == NULL) break; } uid_cnt = i; - group_uids = (int *) xmalloc (sizeof (uid_t) * (uid_cnt + 1)); - memset (group_uids, 0, (sizeof (uid_t) * (uid_cnt + 1))); + group_uids = (int *) xmalloc(sizeof(uid_t) * (uid_cnt + 1)); + memset(group_uids, 0, (sizeof(uid_t) * (uid_cnt + 1))); j = 0; - for (i=0; i<uid_cnt; i++) { - user_pw_ptr = getpwnam (group_struct_ptr->gr_mem[i]); + for (i = 0; i < uid_cnt; i++) { + user_pw_ptr = getpwnam(group_struct_ptr->gr_mem[i]); if (user_pw_ptr) { if (user_pw_ptr->pw_uid) group_uids[j++] = user_pw_ptr->pw_uid; - } - else - error ("Could not find user %s in configured group %s\n", - group_struct_ptr->gr_mem[i], group_name); - setpwent (); + } else + error + ("Could not find user %s in configured group %s", + group_struct_ptr->gr_mem[i], group_name); + setpwent(); } - setgrent (); + setgrent(); return group_uids; } -/* get_group_tlm - return the time of last modification for the GROUP_FILE (/etc/group) */ -time_t -get_group_tlm (void) +/* _get_group_tlm - return the time of last modification for the GROUP_FILE */ +time_t _get_group_tlm(void) { struct stat stat_buf; - if (stat (GROUP_FILE, &stat_buf)) { - error ("Can't stat file %s %m", GROUP_FILE); + if (stat(GROUP_FILE, &stat_buf)) { + error("Can't stat file %s %m", GROUP_FILE); return (time_t) 0; } return stat_buf.st_mtime; } #if EXTREME_LOGGING -/* print_group_members - print the members of a uid list */ -void -print_group_members (uid_t *uid_list) +/* _print_group_members - print the members of a uid list */ +static void _print_group_members(uid_t * uid_list) { int i; if (uid_list) { - for (i=0; uid_list[i]; i++) { - debug3 ("%u", (unsigned int) uid_list[i]); + for (i = 0; uid_list[i]; i++) { + debug3("%u", (unsigned int) uid_list[i]); } } - printf ("\n\n"); + printf("\n\n"); } #endif -/* uid_list_size - return the count of uid's in a zero terminated list */ -int -uid_list_size (uid_t *uid_list_ptr) +/* _uid_list_size - return the count of uid's in a zero terminated list */ +static int _uid_list_size(uid_t * uid_list_ptr) { int i; if (uid_list_ptr == NULL) return 0; - for (i=0; ; i++) { + for (i = 0;; i++) { if (uid_list_ptr[i] == 0) break; } diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 8c30da6fadf..633c83ea42b 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -25,7 +25,7 @@ \*****************************************************************************/ #ifdef HAVE_CONFIG_H -# include <config.h> +# include "config.h" #endif #include <ctype.h> @@ -38,23 +38,25 @@ #include <time.h> #include <unistd.h> -#include <src/common/hostlist.h> -#include <src/common/list.h> -#include <src/common/macros.h> -#include <src/common/parse_spec.h> -#include <src/common/read_config.h> -#include <src/common/xstring.h> -#include <src/slurmctld/locks.h> -#include <src/slurmctld/slurmctld.h> +#include "src/common/hostlist.h" +#include "src/common/list.h" +#include "src/slurmctld/locks.h" +#include "src/common/macros.h" +#include "src/common/parse_spec.h" +#include "src/common/read_config.h" +#include "src/common/xstring.h" +#include "src/slurmctld/slurmctld.h" #define BUF_SIZE 1024 -static int init_all_slurm_conf (void); -static int parse_node_spec (char *in_line); -static int parse_part_spec (char *in_line); -static void set_config_defaults (slurm_ctl_conf_t *ctl_conf_ptr); +static int _build_bitmaps(void); +static int _init_all_slurm_conf(void); +static int _parse_node_spec(char *in_line); +static int _parse_part_spec(char *in_line); +static void _set_config_defaults(slurm_ctl_conf_t * ctl_conf_ptr); +static int _sync_nodes_to_jobs(void); #ifdef HAVE_LIBELAN3 -static void validate_node_proc_count (void); +static void _validate_node_proc_count(void); #endif static char highest_node_name[MAX_NAME_LEN] = ""; @@ -62,150 +64,172 @@ int node_record_count = 0; /* - * build_bitmaps - build node bitmaps to define which nodes are in which + * _build_bitmaps - build node bitmaps to define which nodes are in which * 1) partition 2) configuration record 3) up state 4) idle state * also sets values of total_nodes and total_cpus for every partition. - * output: return - 0 if no error, errno otherwise + * RET 0 if no error, errno otherwise + * Note: Operates on common variables, no arguments * global: idle_node_bitmap - bitmap record of idle nodes * up_node_bitmap - bitmap records of up nodes * node_record_count - number of nodes in the system * node_record_table_ptr - pointer to global node table * part_list - pointer to global partition list */ -int -build_bitmaps () +static int _build_bitmaps(void) { int i, j, error_code; - char *this_node_name; - ListIterator config_record_iterator; /* for iterating through config_record */ - ListIterator part_record_iterator; /* for iterating through part_record_list */ - struct config_record *config_record_point; /* pointer to config_record */ - struct part_record *part_record_point; /* pointer to part_record */ - struct node_record *node_record_point; /* pointer to node_record */ + char *this_node_name; + ListIterator config_record_iterator; + ListIterator part_record_iterator; + struct config_record *config_record_point; + struct part_record *part_record_point; + struct node_record *node_record_point; bitstr_t *all_part_node_bitmap; hostlist_t host_list; error_code = 0; - last_node_update = time (NULL); - last_part_update = time (NULL); + last_node_update = time(NULL); + last_part_update = time(NULL); /* initialize the idle and up bitmaps */ if (idle_node_bitmap) - bit_free (idle_node_bitmap); + bit_free(idle_node_bitmap); if (up_node_bitmap) - bit_free (up_node_bitmap); - idle_node_bitmap = (bitstr_t *) bit_alloc (node_record_count); - up_node_bitmap = (bitstr_t *) bit_alloc (node_record_count); + bit_free(up_node_bitmap); + idle_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); + up_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); if ((idle_node_bitmap == NULL) || (up_node_bitmap == NULL)) - fatal ("bit_alloc memory allocation failure"); + fatal("bit_alloc memory allocation failure"); /* initialize the configuration bitmaps */ - config_record_iterator = list_iterator_create (config_list); + config_record_iterator = list_iterator_create(config_list); if (config_record_iterator == NULL) - fatal ("build_bitmaps: list_iterator_create unable to allocate memory"); - - while ((config_record_point = (struct config_record *) list_next (config_record_iterator))) { + fatal + ("_build_bitmaps: list_iterator_create unable to allocate memory"); + + while ((config_record_point = + (struct config_record *) + list_next(config_record_iterator))) { if (config_record_point->node_bitmap) - bit_free (config_record_point->node_bitmap); + bit_free(config_record_point->node_bitmap); - config_record_point->node_bitmap = (bitstr_t *) bit_alloc (node_record_count); + config_record_point->node_bitmap = + (bitstr_t *) bit_alloc(node_record_count); if (config_record_point->node_bitmap == NULL) - fatal ("bit_alloc memory allocation failure"); - } - list_iterator_destroy (config_record_iterator); + fatal("bit_alloc memory allocation failure"); + } + list_iterator_destroy(config_record_iterator); - /* scan all nodes and identify which are up and idle and their configuration */ + /* scan all nodes and identify which are up, idle and + * their configuration */ for (i = 0; i < node_record_count; i++) { uint16_t base_state, no_resp_flag; if (node_record_table_ptr[i].name[0] == '\0') continue; /* defunct */ - base_state = node_record_table_ptr[i].node_state & (~NODE_STATE_NO_RESPOND); - no_resp_flag = node_record_table_ptr[i].node_state & NODE_STATE_NO_RESPOND; + base_state = + node_record_table_ptr[i]. + node_state & (~NODE_STATE_NO_RESPOND); + no_resp_flag = + node_record_table_ptr[i]. + node_state & NODE_STATE_NO_RESPOND; if (base_state == NODE_STATE_IDLE) - bit_set (idle_node_bitmap, i); + bit_set(idle_node_bitmap, i); if ((base_state != NODE_STATE_DOWN) && (base_state != NODE_STATE_UNKNOWN) && (base_state != NODE_STATE_DRAINED) && (no_resp_flag == 0)) - bit_set (up_node_bitmap, i); + bit_set(up_node_bitmap, i); if (node_record_table_ptr[i].config_ptr) - bit_set (node_record_table_ptr[i].config_ptr->node_bitmap, i); - } + bit_set(node_record_table_ptr[i].config_ptr-> + node_bitmap, i); + } /* scan partition table and identify nodes in each */ - all_part_node_bitmap = (bitstr_t *) bit_alloc (node_record_count); + all_part_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); if (all_part_node_bitmap == NULL) - fatal ("bit_alloc memory allocation failure"); - part_record_iterator = list_iterator_create (part_list); + fatal("bit_alloc memory allocation failure"); + part_record_iterator = list_iterator_create(part_list); if (part_record_iterator == NULL) - fatal ("build_bitmaps: list_iterator_create unable to allocate memory"); + fatal + ("_build_bitmaps: list_iterator_create unable to allocate memory"); - while ((part_record_point = (struct part_record *) list_next (part_record_iterator))) { + while ((part_record_point = + (struct part_record *) list_next(part_record_iterator))) { if (part_record_point->node_bitmap) - bit_free (part_record_point->node_bitmap); - part_record_point->node_bitmap = (bitstr_t *) bit_alloc (node_record_count); + bit_free(part_record_point->node_bitmap); + part_record_point->node_bitmap = + (bitstr_t *) bit_alloc(node_record_count); if (part_record_point->node_bitmap == NULL) - fatal ("bit_alloc memory allocation failure"); + fatal("bit_alloc memory allocation failure"); /* check for each node in the partition */ if ((part_record_point->nodes == NULL) || (part_record_point->nodes[0] == '\0')) continue; - if ( (host_list = hostlist_create (part_record_point->nodes)) == NULL) { - error ("hostlist_create error for %s, %m", part_record_point->nodes); + if ((host_list = + hostlist_create(part_record_point->nodes)) == NULL) { + error("hostlist_create error for %s, %m", + part_record_point->nodes); continue; } - while ( (this_node_name = hostlist_shift (host_list)) ) { - node_record_point = find_node_record (this_node_name); + while ((this_node_name = hostlist_shift(host_list))) { + node_record_point = + find_node_record(this_node_name); if (node_record_point == NULL) { - error ("build_bitmaps: invalid node name specified %s", - this_node_name); - free (this_node_name); + error + ("_build_bitmaps: invalid node name specified %s", + this_node_name); + free(this_node_name); continue; - } - j = node_record_point - node_record_table_ptr; - if (bit_test (all_part_node_bitmap, j) == 1) { - error ("build_bitmaps: node %s defined in more than one partition", - this_node_name); - error ("build_bitmaps: only the first specification is honored"); } - else { - bit_set (part_record_point->node_bitmap, j); - bit_set (all_part_node_bitmap, j); + j = node_record_point - node_record_table_ptr; + if (bit_test(all_part_node_bitmap, j) == 1) { + error + ("_build_bitmaps: node %s defined in more than one partition", + this_node_name); + error + ("_build_bitmaps: only the first specification is honored"); + } else { + bit_set(part_record_point->node_bitmap, j); + bit_set(all_part_node_bitmap, j); part_record_point->total_nodes++; - part_record_point->total_cpus += node_record_point->cpus; - node_record_point->partition_ptr = part_record_point; + part_record_point->total_cpus += + node_record_point->cpus; + node_record_point->partition_ptr = + part_record_point; } - free (this_node_name); - } - hostlist_destroy (host_list); + free(this_node_name); + } + hostlist_destroy(host_list); } - list_iterator_destroy (part_record_iterator); - bit_free (all_part_node_bitmap); + list_iterator_destroy(part_record_iterator); + bit_free(all_part_node_bitmap); return error_code; } /* - * init_all_slurm_conf - initialize or re-initialize the slurm configuration values. - * output: return value - 0 if no error, otherwise an error code + * _init_all_slurm_conf - initialize or re-initialize the slurm + * configuration values. + * RET 0 if no error, otherwise an error code + * Note: Operates on common variables, no arguments */ -static int -init_all_slurm_conf () { +static int _init_all_slurm_conf(void) +{ int error_code; - init_slurm_conf (&slurmctld_conf); + init_slurm_conf(&slurmctld_conf); - if ((error_code = init_node_conf ())) + if ((error_code = init_node_conf())) return error_code; - if ((error_code = init_part_conf ())) + if ((error_code = init_part_conf())) return error_code; - if ((error_code = init_job_conf ())) + if ((error_code = init_job_conf())) return error_code; strcpy(highest_node_name, ""); @@ -214,18 +238,20 @@ init_all_slurm_conf () { /* - * parse_node_spec - parse the node specification (per the configuration file format), - * build table and set values - * input: in_line line from the configuration file - * output: in_line parsed keywords and values replaced by blanks - * return - 0 if no error, error code otherwise - * global: default_config_record - default configuration values for group of nodes + * _parse_node_spec - parse the node specification (per the configuration + * file format), build table and set values + * IN/OUT in_line - line from the configuration file, parsed keywords + * and values replaced by blanks + * RET 0 if no error, error code otherwise + * Note: Operates on common variables + * global: default_config_record - default configuration values for + * group of nodes * default_node_record - default node configuration values */ -static int -parse_node_spec (char *in_line) { +static int _parse_node_spec(char *in_line) +{ char *node_addr, *node_name, *state, *feature; - char *this_node_addr , *this_node_name; + char *this_node_addr, *this_node_name; int error_code, first, i; int state_val, cpus_val, real_memory_val, tmp_disk_val, weight_val; struct node_record *node_record_point; @@ -235,20 +261,19 @@ parse_node_spec (char *in_line) { node_addr = node_name = state = feature = (char *) NULL; cpus_val = real_memory_val = state_val = NO_VAL; tmp_disk_val = weight_val = NO_VAL; - if ((error_code = load_string (&node_name, "NodeName=", in_line))) + if ((error_code = load_string(&node_name, "NodeName=", in_line))) return error_code; if (node_name == NULL) return 0; /* no node info */ error_code = slurm_parser(in_line, - "Feature=", 's', &feature, - "NodeAddr=", 's', &node_addr, - "Procs=", 'd', &cpus_val, - "RealMemory=", 'd', &real_memory_val, - "State=", 's', &state, - "TmpDisk=", 'd', &tmp_disk_val, - "Weight=", 'd', &weight_val, - "END"); + "Feature=", 's', &feature, + "NodeAddr=", 's', &node_addr, + "Procs=", 'd', &cpus_val, + "RealMemory=", 'd', &real_memory_val, + "State=", 's', &state, + "TmpDisk=", 'd', &tmp_disk_val, + "Weight=", 'd', &weight_val, "END"); if (error_code) goto cleanup; @@ -256,125 +281,135 @@ parse_node_spec (char *in_line) { if (state != NULL) { state_val = NO_VAL; for (i = 0; i <= NODE_STATE_END; i++) { - if (strcasecmp (node_state_string(i), "END") == 0) + if (strcasecmp(node_state_string(i), "END") == 0) break; - if (strcasecmp (node_state_string(i), state) == 0) { + if (strcasecmp(node_state_string(i), state) == 0) { state_val = i; break; - } - } + } + } if (state_val == NO_VAL) { - error ("parse_node_spec: invalid state %s for node_name %s", - state, node_name); + error + ("_parse_node_spec: invalid state %s for node_name %s", + state, node_name); error_code = EINVAL; goto cleanup; - } - } + } + } - if ( node_addr && - ((addr_list = hostlist_create (node_addr)) == NULL)) { - error ("hostlist_create error for %s: %m", node_addr); + if (node_addr && + ((addr_list = hostlist_create(node_addr)) == NULL)) { + error("hostlist_create error for %s: %m", node_addr); error_code = errno; goto cleanup; } - if ( (host_list = hostlist_create (node_name)) == NULL) { - error ("hostlist_create error for %s: %m", node_name); + if ((host_list = hostlist_create(node_name)) == NULL) { + error("hostlist_create error for %s: %m", node_name); error_code = errno; goto cleanup; } first = 1; - while ( (this_node_name = hostlist_shift (host_list)) ) { - if (strcmp (this_node_name, "localhost") == 0) { - free (this_node_name); - this_node_name = malloc (128); + while ((this_node_name = hostlist_shift(host_list))) { + if (strcmp(this_node_name, "localhost") == 0) { + free(this_node_name); + this_node_name = malloc(128); if (this_node_name == NULL) - fatal ("memory allocation failure"); - getnodename (this_node_name, 128); + fatal("memory allocation failure"); + getnodename(this_node_name, 128); } - if (strcasecmp (this_node_name, "DEFAULT") == 0) { + if (strcasecmp(this_node_name, "DEFAULT") == 0) { xfree(node_name); node_name = NULL; if (cpus_val != NO_VAL) default_config_record.cpus = cpus_val; if (real_memory_val != NO_VAL) - default_config_record.real_memory = real_memory_val; + default_config_record.real_memory = + real_memory_val; if (tmp_disk_val != NO_VAL) - default_config_record.tmp_disk = tmp_disk_val; + default_config_record.tmp_disk = + tmp_disk_val; if (weight_val != NO_VAL) default_config_record.weight = weight_val; if (state_val != NO_VAL) default_node_record.node_state = state_val; if (feature) { if (default_config_record.feature) - xfree (default_config_record.feature); + xfree(default_config_record. + feature); default_config_record.feature = feature; } - free (this_node_name); + free(this_node_name); break; } if (first == 1) { first = 0; - config_point = create_config_record (); + config_point = create_config_record(); if (config_point->nodes) - free(config_point->nodes); + free(config_point->nodes); config_point->nodes = node_name; if (cpus_val != NO_VAL) config_point->cpus = cpus_val; if (real_memory_val != NO_VAL) - config_point->real_memory = real_memory_val; + config_point->real_memory = + real_memory_val; if (tmp_disk_val != NO_VAL) config_point->tmp_disk = tmp_disk_val; if (weight_val != NO_VAL) config_point->weight = weight_val; if (feature) { if (config_point->feature) - xfree (config_point->feature); + xfree(config_point->feature); config_point->feature = feature; - } - } + } + } - if (strcmp (this_node_name, highest_node_name) <= 0) - node_record_point = find_node_record (this_node_name); + if (strcmp(this_node_name, highest_node_name) <= 0) + node_record_point = + find_node_record(this_node_name); else { - strncpy (highest_node_name, this_node_name, MAX_NAME_LEN); + strncpy(highest_node_name, this_node_name, + MAX_NAME_LEN); node_record_point = NULL; } if (node_record_point == NULL) { - node_record_point = create_node_record (config_point, this_node_name); - if ((state_val != NO_VAL) && - (state_val != NODE_STATE_UNKNOWN)) + node_record_point = + create_node_record(config_point, + this_node_name); + if ((state_val != NO_VAL) + && (state_val != NODE_STATE_UNKNOWN)) node_record_point->node_state = state_val; - node_record_point->last_response = time (NULL); + node_record_point->last_response = time(NULL); if (node_addr) - this_node_addr = hostlist_shift (addr_list); + this_node_addr = hostlist_shift(addr_list); else this_node_addr = NULL; if (this_node_addr) { - strncpy (node_record_point->comm_name, - this_node_addr, MAX_NAME_LEN); - free (this_node_addr); + strncpy(node_record_point->comm_name, + this_node_addr, MAX_NAME_LEN); + free(this_node_addr); } else { - strncpy (node_record_point->comm_name, - node_record_point->name, MAX_NAME_LEN); + strncpy(node_record_point->comm_name, + node_record_point->name, + MAX_NAME_LEN); } + } else { + error + ("_parse_node_spec: reconfiguration for node %s ignored.", + this_node_name); } - else { - error ("parse_node_spec: reconfiguration for node %s ignored.", - this_node_name); - } - free (this_node_name); + free(this_node_name); } /* xfree allocated storage */ if (state) xfree(state); if (addr_list) - hostlist_destroy (addr_list); - hostlist_destroy (host_list); + hostlist_destroy(addr_list); + hostlist_destroy(host_list); return error_code; cleanup: @@ -389,13 +424,17 @@ parse_node_spec (char *in_line) { /* - * parse_part_spec - parse the partition specification, build table and set values - * output: 0 if no error, error code otherwise + * _parse_part_spec - parse the partition specification, build table and + * set values + * IN/OUT in_line - line from the configuration file, parsed keywords + * and values replaced by blanks + * RET 0 if no error, error code otherwise + * Note: Operates on common variables * global: part_list - global partition list pointer * default_part - default parameters for a partition */ -static int -parse_part_spec (char *in_line) { +static int _parse_part_spec(char *in_line) +{ char *allow_groups, *nodes, *partition_name; char *default_str, *root_str, *shared_str, *state_str; int max_time_val, max_nodes_val, root_val, default_val; @@ -405,99 +444,105 @@ parse_part_spec (char *in_line) { partition_name = (char *) NULL; default_str = shared_str = state_str = (char *) NULL; - max_time_val = max_nodes_val = root_val = default_val = state_val = shared_val = NO_VAL; + max_time_val = max_nodes_val = root_val = default_val = state_val = + shared_val = NO_VAL; - if ((error_code = load_string (&partition_name, "PartitionName=", in_line))) + if ((error_code = + load_string(&partition_name, "PartitionName=", in_line))) return error_code; if (partition_name == NULL) return 0; /* no partition info */ - if (strlen (partition_name) >= MAX_NAME_LEN) { - error ("parse_part_spec: partition name %s too long\n", partition_name); - xfree (partition_name); + if (strlen(partition_name) >= MAX_NAME_LEN) { + error("_parse_part_spec: partition name %s too long", + partition_name); + xfree(partition_name); return EINVAL; - } + } allow_groups = default_str = root_str = nodes = NULL; shared_str = state_str = NULL; error_code = slurm_parser(in_line, - "AllowGroups=", 's', &allow_groups, - "Default=", 's', &default_str, - "RootOnly=", 's', &root_str, - "MaxTime=", 'd', &max_time_val, - "MaxNodes=", 'd', &max_nodes_val, - "Nodes=", 's', &nodes, - "Shared=", 's', &shared_str, - "State=", 's', &state_str, - "END"); - - if (error_code) + "AllowGroups=", 's', &allow_groups, + "Default=", 's', &default_str, + "RootOnly=", 's', &root_str, + "MaxTime=", 'd', &max_time_val, + "MaxNodes=", 'd', &max_nodes_val, + "Nodes=", 's', &nodes, + "Shared=", 's', &shared_str, + "State=", 's', &state_str, "END"); + + if (error_code) goto cleanup; if (default_str) { - if (strcasecmp (default_str, "YES") == 0) + if (strcasecmp(default_str, "YES") == 0) default_val = 1; - else if (strcasecmp (default_str, "NO") == 0) + else if (strcasecmp(default_str, "NO") == 0) default_val = 0; else { - error ("update_part: ignored partition %s update, bad state %s", - partition_name, default_str); + error + ("update_part: ignored partition %s update, bad state %s", + partition_name, default_str); error_code = EINVAL; goto cleanup; } - xfree (default_str); + xfree(default_str); default_str = NULL; } if (root_str) { - if (strcasecmp (root_str, "YES") == 0) + if (strcasecmp(root_str, "YES") == 0) root_val = 1; - else if (strcasecmp (root_str, "NO") == 0) + else if (strcasecmp(root_str, "NO") == 0) root_val = 0; else { - error ("update_part: ignored partition %s update, bad key %s", - partition_name, root_str); + error + ("update_part: ignored partition %s update, bad key %s", + partition_name, root_str); error_code = EINVAL; goto cleanup; } - xfree (root_str); + xfree(root_str); root_str = NULL; } if (shared_str) { - if (strcasecmp (shared_str, "YES") == 0) + if (strcasecmp(shared_str, "YES") == 0) shared_val = SHARED_YES; - else if (strcasecmp (shared_str, "NO") == 0) + else if (strcasecmp(shared_str, "NO") == 0) shared_val = SHARED_NO; - else if (strcasecmp (shared_str, "FORCE") == 0) + else if (strcasecmp(shared_str, "FORCE") == 0) shared_val = SHARED_FORCE; else { - error ("update_part: ignored partition %s update, bad shared %s", - partition_name, shared_str); + error + ("update_part: ignored partition %s update, bad shared %s", + partition_name, shared_str); error_code = EINVAL; goto cleanup; } - xfree (shared_str); + xfree(shared_str); shared_str = NULL; } if (state_str) { - if (strcasecmp (state_str, "UP") == 0) + if (strcasecmp(state_str, "UP") == 0) state_val = 1; - else if (strcasecmp (state_str, "DOWN") == 0) + else if (strcasecmp(state_str, "DOWN") == 0) state_val = 0; else { - error ("update_part: ignored partition %s update, bad state %s", - partition_name, state_str); + error + ("update_part: ignored partition %s update, bad state %s", + partition_name, state_str); error_code = EINVAL; goto cleanup; } - xfree (state_str); + xfree(state_str); state_str = NULL; } - if (strcasecmp (partition_name, "DEFAULT") == 0) { - xfree (partition_name); + if (strcasecmp(partition_name, "DEFAULT") == 0) { + xfree(partition_name); if (max_time_val != NO_VAL) default_part.max_time = max_time_val; if (max_nodes_val != NO_VAL) @@ -510,32 +555,34 @@ parse_part_spec (char *in_line) { default_part.shared = shared_val; if (allow_groups) { if (default_part.allow_groups) - xfree (default_part.allow_groups); + xfree(default_part.allow_groups); default_part.allow_groups = allow_groups; - } + } if (nodes) { if (default_part.nodes) - xfree (default_part.nodes); + xfree(default_part.nodes); default_part.nodes = nodes; - } + } return 0; - } + } - part_record_point = list_find_first (part_list, &list_find_part, partition_name); + part_record_point = + list_find_first(part_list, &list_find_part, partition_name); if (part_record_point == NULL) { - part_record_point = create_part_record (); - strcpy (part_record_point->name, partition_name); + part_record_point = create_part_record(); + strcpy(part_record_point->name, partition_name); + } else { + info("_parse_node_spec: duplicate entry for partition %s", + partition_name); } - else { - info ("parse_node_spec: duplicate entry for partition %s", partition_name); - } if (default_val == 1) { - if (strlen (default_part_name) > 0) - info ("parse_part_spec: changing default partition from %s to %s", - default_part_name, partition_name); - strcpy (default_part_name, partition_name); + if (strlen(default_part_name) > 0) + info( + "_parse_part_spec: changing default partition from %s to %s", + default_part_name, partition_name); + strcpy(default_part_name, partition_name); default_part_loc = part_record_point; - } + } if (max_time_val != NO_VAL) part_record_point->max_time = max_time_val; if (max_nodes_val != NO_VAL) @@ -548,22 +595,22 @@ parse_part_spec (char *in_line) { part_record_point->shared = shared_val; if (allow_groups) { if (part_record_point->allow_groups) - xfree (part_record_point->allow_groups); + xfree(part_record_point->allow_groups); part_record_point->allow_groups = allow_groups; - } + } if (nodes) { if (part_record_point->nodes) - xfree (part_record_point->nodes); - if (strcmp (nodes, "localhost") == 0) { - xfree (nodes); - nodes = xmalloc (128); + xfree(part_record_point->nodes); + if (strcmp(nodes, "localhost") == 0) { + xfree(nodes); + nodes = xmalloc(128); if (nodes == NULL) - fatal ("memory allocation failure"); - getnodename (nodes, 128); + fatal("memory allocation failure"); + getnodename(nodes, 128); } part_record_point->nodes = nodes; - } - xfree (partition_name); + } + xfree(partition_name); return 0; cleanup: @@ -588,11 +635,12 @@ parse_part_spec (char *in_line) { /* * read_slurm_conf - load the slurm configuration from the configured file. * read_slurm_conf can be called more than once if so desired. - * input: recover - set to use state saved from last slurmctld shutdown - * output: return - 0 if no error, otherwise an error code + * IN recover - set to use state saved from last slurmctld shutdown + * RET 0 if no error, otherwise an error code + * Note: Operates on common variables only */ -int -read_slurm_conf (int recover) { +int read_slurm_conf(int recover) +{ clock_t start_time; FILE *slurm_spec_file; /* pointer to input data file */ int line_num; /* line number in input file */ @@ -603,187 +651,156 @@ read_slurm_conf (int recover) { struct node_record *node_record_point; /* initialization */ - start_time = clock (); + start_time = clock(); old_node_record_count = node_record_count; - old_node_table_ptr = node_record_table_ptr; /* save node states for reconfig RPC */ + old_node_table_ptr = + node_record_table_ptr; /* save node states for reconfig RPC */ node_record_table_ptr = NULL; - if ( (error_code = init_all_slurm_conf ()) ) { + if ((error_code = _init_all_slurm_conf())) { node_record_table_ptr = old_node_table_ptr; return error_code; } - slurm_spec_file = fopen (slurmctld_conf.slurm_conf, "r"); + slurm_spec_file = fopen(slurmctld_conf.slurm_conf, "r"); if (slurm_spec_file == NULL) - fatal ("read_slurm_conf error opening file %s, %m", - slurmctld_conf.slurm_conf); + fatal("read_slurm_conf error opening file %s, %m", + slurmctld_conf.slurm_conf); - info ("read_slurm_conf: loading configuration from %s", slurmctld_conf.slurm_conf); + info("read_slurm_conf: loading configuration from %s", + slurmctld_conf.slurm_conf); /* process the data file */ line_num = 0; - while (fgets (in_line, BUF_SIZE, slurm_spec_file) != NULL) { + while (fgets(in_line, BUF_SIZE, slurm_spec_file) != NULL) { line_num++; - if (strlen (in_line) >= (BUF_SIZE - 1)) { - error ("read_slurm_conf line %d, of input file %s too long\n", - line_num, slurmctld_conf.slurm_conf); + if (strlen(in_line) >= (BUF_SIZE - 1)) { + error + ("read_slurm_conf line %d, of input file %s too long", + line_num, slurmctld_conf.slurm_conf); if (old_node_table_ptr) - xfree (old_node_table_ptr); - fclose (slurm_spec_file); + xfree(old_node_table_ptr); + fclose(slurm_spec_file); return E2BIG; break; - } + } /* everything after a non-escaped "#" is a comment */ /* replace comment flag "#" with an end of string (NULL) */ + /* escape sequence "\#" translated to "#" */ for (i = 0; i < BUF_SIZE; i++) { if (in_line[i] == (char) NULL) break; if (in_line[i] != '#') continue; - if ((i > 0) && (in_line[i - 1] == '\\')) { /* escaped "#" */ + if ((i > 0) && (in_line[i - 1] == '\\')) { for (j = i; j < BUF_SIZE; j++) { in_line[j - 1] = in_line[j]; - } + } continue; - } + } in_line[i] = (char) NULL; break; - } + } + + /* parse what is left, non-commnents */ - /* parse what is left */ - /* overall configuration parameters */ - if ((error_code = parse_config_spec (in_line, &slurmctld_conf))) { - fclose (slurm_spec_file); + if ((error_code = + parse_config_spec(in_line, &slurmctld_conf))) { + fclose(slurm_spec_file); if (old_node_table_ptr) - xfree (old_node_table_ptr); + xfree(old_node_table_ptr); return error_code; } /* node configuration parameters */ - if ((error_code = parse_node_spec (in_line))) { - fclose (slurm_spec_file); + if ((error_code = _parse_node_spec(in_line))) { + fclose(slurm_spec_file); if (old_node_table_ptr) - xfree (old_node_table_ptr); + xfree(old_node_table_ptr); return error_code; - } + } /* partition configuration parameters */ - if ((error_code = parse_part_spec (in_line))) { - fclose (slurm_spec_file); + if ((error_code = _parse_part_spec(in_line))) { + fclose(slurm_spec_file); if (old_node_table_ptr) - xfree (old_node_table_ptr); + xfree(old_node_table_ptr); return error_code; - } + } /* report any leftover strings on input line */ - report_leftover (in_line, line_num); - } - fclose (slurm_spec_file); + report_leftover(in_line, line_num); + } + fclose(slurm_spec_file); - validate_config (&slurmctld_conf); - set_config_defaults (&slurmctld_conf); + validate_config(&slurmctld_conf); + _set_config_defaults(&slurmctld_conf); if (default_part_loc == NULL) { - error ("read_slurm_conf: default partition not set."); + error("read_slurm_conf: default partition not set."); if (old_node_table_ptr) - xfree (old_node_table_ptr); + xfree(old_node_table_ptr); return EINVAL; - } + } if (node_record_count < 1) { - error ("read_slurm_conf: no nodes configured."); + error("read_slurm_conf: no nodes configured."); if (old_node_table_ptr) - xfree (old_node_table_ptr); + xfree(old_node_table_ptr); return EINVAL; - } - - rehash (); + } + + rehash(); if (old_node_table_ptr) { - info ("restoring original state of nodes"); - for (i=0; i<old_node_record_count; i++) { - node_record_point = find_node_record (old_node_table_ptr[i].name); + info("restoring original state of nodes"); + for (i = 0; i < old_node_record_count; i++) { + node_record_point = + find_node_record(old_node_table_ptr[i].name); if (node_record_point) - node_record_point->node_state = old_node_table_ptr[i].node_state; + node_record_point->node_state = + old_node_table_ptr[i].node_state; } - xfree (old_node_table_ptr); + xfree(old_node_table_ptr); } - set_slurmd_addr (); + set_slurmd_addr(); if (recover) { - (void) load_node_state (); - (void) load_part_state (); - (void) load_job_state (); + (void) load_node_state(); + (void) load_part_state(); + (void) load_job_state(); } - if ((error_code = build_bitmaps ())) + if ((error_code = _build_bitmaps())) return error_code; #ifdef HAVE_LIBELAN3 - validate_node_proc_count (); + _validate_node_proc_count(); #endif if (recover) { - (void) sync_nodes_to_jobs (); + (void) _sync_nodes_to_jobs(); } - load_part_uid_allow_list ( 1 ); + load_part_uid_allow_list(1); /* sort config_list by weight for scheduling */ - list_sort (config_list, &list_compare_config); + list_sort(config_list, &list_compare_config); - slurmctld_conf.last_update = time (NULL) ; - info ("read_slurm_conf: finished loading configuration, time=%ld", - (long) (clock () - start_time)); + slurmctld_conf.last_update = time(NULL); + info("read_slurm_conf: finished loading configuration, time=%ld", + (long) (clock() - start_time)); return SLURM_SUCCESS; } -/* - * sync_nodes_to_jobs - sync the node state to job states on slurmctld restart. - * we perform "lazy" updates on node states due to their number (assumes - * number of jobs is much smaller than the number of nodes). This routine - * marks nodes allocated to a job as busy no matter what the node's last - * saved state - * output: returns count of nodes having state changed - */ -int -sync_nodes_to_jobs (void) -{ - struct job_record *job_ptr; - ListIterator job_record_iterator; - int i, update_cnt = 0; - - job_record_iterator = list_iterator_create (job_list); - while ((job_ptr = (struct job_record *) list_next (job_record_iterator))) { - if ((job_ptr->job_state == JOB_PENDING) || - (job_ptr->job_state == JOB_COMPLETE) || - (job_ptr->job_state == JOB_FAILED) || - (job_ptr->job_state == JOB_TIMEOUT)) - continue; - if (job_ptr->node_bitmap == NULL) - continue; - for (i = 0; i < node_record_count; i++) { - if (bit_test (job_ptr->node_bitmap, i) == 0) - continue; - if (node_record_table_ptr[i].node_state == NODE_STATE_ALLOCATED) - continue; /* already in proper state */ - update_cnt++; - if (node_record_table_ptr[i].node_state & NODE_STATE_NO_RESPOND) - node_record_table_ptr[i].node_state = NODE_STATE_ALLOCATED | - NODE_STATE_NO_RESPOND; - else - node_record_table_ptr[i].node_state = NODE_STATE_ALLOCATED; - } - } - if (update_cnt) - info ("sync_nodes_to_jobs updated state of %d nodes", update_cnt); - return update_cnt; -} -static void -set_config_defaults (slurm_ctl_conf_t *ctl_conf_ptr) +/* Set configuration parameters to default values if not initialized + * by the configuration file + */ +static void _set_config_defaults(slurm_ctl_conf_t * ctl_conf_ptr) { if (ctl_conf_ptr->backup_controller == NULL) - info ("read_slurm_conf: backup_controller value not specified."); + info( + "read_slurm_conf: backup_controller value not specified."); if (ctl_conf_ptr->fast_schedule == (uint16_t) NO_VAL) ctl_conf_ptr->fast_schedule = 1; @@ -798,7 +815,7 @@ set_config_defaults (slurm_ctl_conf_t *ctl_conf_ptr) ctl_conf_ptr->heartbeat_interval = 60; if (ctl_conf_ptr->inactive_limit == (uint16_t) NO_VAL) - ctl_conf_ptr->inactive_limit = 0; /* unlimited */ + ctl_conf_ptr->inactive_limit = 0; /* unlimited */ if (ctl_conf_ptr->kill_wait == (uint16_t) NO_VAL) ctl_conf_ptr->kill_wait = 30; @@ -813,42 +830,97 @@ set_config_defaults (slurm_ctl_conf_t *ctl_conf_ptr) ctl_conf_ptr->slurmd_timeout = 300; if (ctl_conf_ptr->state_save_location == NULL) - ctl_conf_ptr->state_save_location = xstrdup (DEFAULT_TMP_FS); + ctl_conf_ptr->state_save_location = + xstrdup(DEFAULT_TMP_FS); if (ctl_conf_ptr->tmp_fs == NULL) - ctl_conf_ptr->tmp_fs = xstrdup (DEFAULT_TMP_FS); + ctl_conf_ptr->tmp_fs = xstrdup(DEFAULT_TMP_FS); } +/* + * _sync_nodes_to_jobs - sync node state to job states on slurmctld restart. + * we perform "lazy" updates on node states due to their number (assumes + * number of jobs is much smaller than the number of nodes). This + * routine marks nodes allocated to a job as busy no matter what the + * node's last saved state + * RET count of nodes having state changed + * Note: Operates on common variables, no arguments + */ +static int _sync_nodes_to_jobs(void) +{ + struct job_record *job_ptr; + ListIterator job_record_iterator; + int i, update_cnt = 0; + + job_record_iterator = list_iterator_create(job_list); + while ((job_ptr = + (struct job_record *) list_next(job_record_iterator))) { + if ((job_ptr->job_state == JOB_PENDING) + || (job_ptr->job_state == JOB_COMPLETE) + || (job_ptr->job_state == JOB_FAILED) + || (job_ptr->job_state == JOB_TIMEOUT)) + continue; + if (job_ptr->node_bitmap == NULL) + continue; + for (i = 0; i < node_record_count; i++) { + if (bit_test(job_ptr->node_bitmap, i) == 0) + continue; + if (node_record_table_ptr[i].node_state == + NODE_STATE_ALLOCATED) + continue; /* already in proper state */ + update_cnt++; + if (node_record_table_ptr[i]. + node_state & NODE_STATE_NO_RESPOND) + node_record_table_ptr[i].node_state = + NODE_STATE_ALLOCATED | + NODE_STATE_NO_RESPOND; + else + node_record_table_ptr[i].node_state = + NODE_STATE_ALLOCATED; + } + } + if (update_cnt) + info("_sync_nodes_to_jobs updated state of %d nodes", + update_cnt); + return update_cnt; +} + #ifdef HAVE_LIBELAN3 -/* Every node in a given partition must have the same processor count at present */ -void validate_node_proc_count (void) +/* Every node in a given partition must have the same processor count + * at present, this function insure it */ +static void _validate_node_proc_count(void) { ListIterator part_record_iterator; struct part_record *part_record_point; int first_bit, last_bit, i, node_size, part_size; - part_record_iterator = list_iterator_create (part_list); - while ((part_record_point = (struct part_record *) list_next (part_record_iterator))) { - first_bit = bit_ffs (part_record_point->node_bitmap); - last_bit = bit_fls (part_record_point->node_bitmap); + part_record_iterator = list_iterator_create(part_list); + while ((part_record_point = + (struct part_record *) list_next(part_record_iterator))) { + first_bit = bit_ffs(part_record_point->node_bitmap); + last_bit = bit_fls(part_record_point->node_bitmap); part_size = -1; - for (i=first_bit; i<=last_bit; i++) { - if (bit_test (part_record_point->node_bitmap, i) == 0) + for (i = first_bit; i <= last_bit; i++) { + if (bit_test(part_record_point->node_bitmap, i) == + 0) continue; if (slurmctld_conf.fast_schedule) - node_size = node_record_table_ptr[i].config_ptr->cpus; - else + node_size = + node_record_table_ptr[i].config_ptr-> + cpus; + else node_size = node_record_table_ptr[i].cpus; if (part_size == -1) part_size = node_size; else if (part_size != node_size) - fatal ("Partition %s has inconsisent processor count", - part_record_point->name); + fatal + ("Partition %s has inconsisent processor count", + part_record_point->name); } - } - list_iterator_destroy (part_record_iterator); + } + list_iterator_destroy(part_record_iterator); } #endif diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index dac3f8b1fbc..3864066ac12 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -29,7 +29,7 @@ #if HAVE_CONFIG_H -# include <config.h> +# include "config.h" # if HAVE_INTTYPES_H # include <inttypes.h> # else @@ -62,14 +62,18 @@ #include "src/common/slurm_protocol_api.h" #include "src/common/xmalloc.h" +/*****************************************************************************\ + * GENERAL CONFIGURATION parameters and data structures +\*****************************************************************************/ /* Perform full slurmctld's state every PERIODIC_CHECKPOINT seconds */ #define PERIODIC_CHECKPOINT 300 /* Retry an incomplete RPC agent request every RPC_RETRY_INTERVAL seconds */ #define RPC_RETRY_INTERVAL 60 -/* Attempt to schedule jobs every PERIODIC_SCHEDULE seconds despite any RPC activity - * This will catch any state transisions that may have otherwise been missed */ +/* Attempt to schedule jobs every PERIODIC_SCHEDULE seconds despite + * any RPC activity. This will catch any state transisions that may + * have otherwise been missed */ #define PERIODIC_SCHEDULE 30 /* Check for jobs reaching their time limit every PERIODIC_TIMEOUT seconds */ @@ -79,56 +83,76 @@ #define GROUP_FILE "/etc/group" /* Check for updates to GROUP_FILE every PERIODIC_GROUP_CHECK seconds, - * Update the group uid_t access list as needed */ + * Update the group uid_t access list as needed */ #define PERIODIC_GROUP_CHECK 600 /* Default temporary storage for slurm state and user files */ #define DEFAULT_TMP_FS "/tmp" +/* Don't accept more jobs once there are MAX_JOB_COUNT in the system + * This should prevent exhausting memory */ +#define MAX_JOB_COUNT 2000 + +/* Purge OK for jobs over MIN_JOB_AGE seconds old (since completion) + * This should prevent exhausting memory */ +#define MIN_JOB_AGE 300 + extern slurm_ctl_conf_t slurmctld_conf; +/*****************************************************************************\ + * NODE parameters and data structures +\*****************************************************************************/ #define MAX_NAME_LEN 32 #define CONFIG_MAGIC 0xc065eded #define NODE_MAGIC 0x0de575ed + struct config_record { uint32_t magic; /* magic cookie to test data integrity */ uint32_t cpus; /* count of cpus running on the node */ - uint32_t real_memory; /* megabytes of real memory on the node */ - uint32_t tmp_disk; /* megabytes of total storage in TMP_FS file system */ - uint32_t weight; /* arbitrary priority of node for scheduling work on */ - char *feature; /* arbitrary list of features associated with a node */ - char *nodes; /* names of nodes in partition configuration record */ - bitstr_t *node_bitmap; /* bitmap of nodes in configuration record */ + uint32_t real_memory; /* MB real memory on the node */ + uint32_t tmp_disk; /* MB total storage in TMP_FS file system */ + uint32_t weight; /* arbitrary priority of node for + scheduling work on */ + char *feature; /* arbitrary list of features associated */ + char *nodes; /* name of nodes with this configuration */ + bitstr_t *node_bitmap; /* bitmap of nodes with this configuration */ }; + extern List config_list; /* list of config_record entries */ -extern time_t last_bitmap_update; /* time of last node creation or deletion */ -extern time_t last_node_update; /* time of last update to node records */ struct node_record { - uint32_t magic; /* magic cookie to test data integrity */ - char name[MAX_NAME_LEN]; /* name of the node. a null name indicates defunct node */ + uint32_t magic; /* magic cookie for data integrity */ + char name[MAX_NAME_LEN]; /* name of the node. NULL==defunct */ uint16_t node_state; /* enum node_states, ORed with - NODE_STATE_NO_RESPOND if not responding */ + NODE_STATE_NO_RESPOND if not + responding */ time_t last_response; /* last response from the node */ - uint32_t cpus; /* actual count of cpus running on the node */ - uint32_t real_memory; /* actual megabytes of real memory on the node */ - uint32_t tmp_disk; /* actual megabytes of total disk in TMP_FS */ - struct config_record *config_ptr; /* configuration specification for this node */ - struct part_record *partition_ptr; /* partition for this node */ - char comm_name[MAX_NAME_LEN]; /* communications path name of the node */ + uint32_t cpus; /* count of cpus on the node */ + uint32_t real_memory; /* MB real memory on the node */ + uint32_t tmp_disk; /* MB total disk in TMP_FS */ + struct config_record *config_ptr; /* configuration spec ptr */ + struct part_record *partition_ptr; /* partition for this node */ + char comm_name[MAX_NAME_LEN]; /* communications path name to node */ slurm_addr slurm_addr; /* network address */ }; -extern struct node_record *node_record_table_ptr; /* location of the node records */ -extern int node_record_count; /* count of records in the node record table */ -extern int *hash_table; /* table of hashed indicies into node_record */ + +extern struct node_record *node_record_table_ptr; /* ptr to node records */ +extern time_t last_bitmap_update; /* time of last node creation or + deletion */ +extern time_t last_node_update; /* time of last node record update */ +extern int node_record_count; /* count in node_record_table_ptr */ +extern int *hash_table; /* table of hashed indicies into + node_record_table_ptr */ extern bitstr_t *up_node_bitmap; /* bitmap of nodes are up */ extern bitstr_t *idle_node_bitmap; /* bitmap of nodes are idle */ extern struct config_record default_config_record; extern struct node_record default_node_record; -/* NOTE: change PART_STRUCT_VERSION value whenever the contents of PART_STRUCT_FORMAT change */ +/*****************************************************************************\ + * PARTITION parameters and data structures +\*****************************************************************************/ #define PART_MAGIC 0xaefe8495 -extern time_t last_part_update; /* time of last update to part records */ + struct part_record { uint32_t magic; /* magic cookie to test data integrity */ char name[MAX_NAME_LEN];/* name of the partition */ @@ -136,40 +160,39 @@ struct part_record { uint32_t max_nodes; /* per job or INFINITE */ uint32_t total_nodes; /* total number of nodes in the partition */ uint32_t total_cpus; /* total number of cpus in the partition */ - uint16_t root_only; /* 1 if allocate/submit RPC can only be issued by user root */ - uint16_t shared; /* 1 if >1 job can share a node, 2 if required */ + uint16_t root_only; /* 1 if allocate/submit RPC can only be + issued by user root */ + uint16_t shared; /* 1 if >1 job can share a node, + 2 if sharingrequired */ uint16_t state_up; /* 1 if state is up, 0 if down */ - char *nodes; /* comma delimited list names of nodes in partition */ - char *allow_groups; /* comma delimited list of groups, null indicates all */ + char *nodes; /* comma delimited list names of nodes */ + char *allow_groups; /* comma delimited list of groups, + * NULL indicates all */ uid_t *allow_uids; /* zero terminated list of allowed users */ bitstr_t *node_bitmap; /* bitmap of nodes in partition */ }; -extern List part_list; /* list of part_record entries */ + +extern List part_list; /* list of part_record entries */ +extern time_t last_part_update; /* time of last part_list update */ extern struct part_record default_part; /* default configuration values */ extern char default_part_name[MAX_NAME_LEN]; /* name of default partition */ -extern struct part_record *default_part_loc; /* location of default partition */ +extern struct part_record *default_part_loc; /* default partition ptr */ -/* NOTE: change JOB_STRUCT_VERSION value whenever the contents of JOB_STRUCT_FORMAT change */ +/*****************************************************************************\ + * JOB parameters and data structures +\*****************************************************************************/ extern time_t last_job_update; /* time of last update to part records */ -/* - FIXME: this should be taken out. - Maybe there should be an update for the step_list in every job. -extern time_t last_step_update; *//* time of last update to job steps */ -/* Don't accept more jobs once there are MAX_JOB_COUNT in the system */ -/* Purge OK for jobs over MIN_JOB_AGE seconds old (since completion) */ -/* This should prevent exhausting memory */ #define DETAILS_MAGIC 0xdea84e7 #define JOB_MAGIC 0xf0b7392c -#define MAX_JOB_COUNT 2000 -#define MIN_JOB_AGE 300 #define STEP_MAGIC 0xce593bc1 extern int job_count; /* number of jobs in the system */ -/* job_details - specification of a job's constraints, can be purged upon resource allocation */ +/* job_details - specification of a job's constraints, + can be purged after initiation */ struct job_details { - uint32_t magic; /* magic cookie to test data integrity */ + uint32_t magic; /* magic cookie for data integrity */ uint16_t batch_flag; /* 1 if batch job (with script) */ uint32_t num_procs; /* minimum number of processors */ uint32_t num_nodes; /* minimum number of nodes */ @@ -177,95 +200,142 @@ struct job_details { bitstr_t *req_node_bitmap; /* bitmap of required nodes */ slurm_job_credential_t credential; /* job credential */ char *features; /* required features */ - uint16_t shared; /* 1 if more than one job can execute on a node */ - uint16_t contiguous; /* requires contiguous nodes, 1=true, 0=false */ + uint16_t shared; /* set node can be shared*/ + uint16_t contiguous; /* set if requires contiguous nodes */ uint32_t min_procs; /* minimum processors per node, MB */ uint32_t min_memory; /* minimum memory per node, MB */ - uint32_t min_tmp_disk; /* minimum temporary disk per node, MB */ + uint32_t min_tmp_disk; /* minimum tempdisk per node, MB */ char *err; /* pathname of job's stderr file */ char *in; /* pathname of job's stdin file */ char *out; /* pathname of job's stdout file */ - uint32_t total_procs; /* total number of allocated processors, for accounting */ + uint32_t total_procs; /* number of allocated processors, + for accounting */ time_t submit_time; /* time of submission */ - char *work_dir; /* pathname of job's working directory */ + char *work_dir; /* pathname of working directory */ }; struct job_record { uint32_t job_id; /* job ID */ - uint32_t magic; /* magic cookie to test data integrity */ + uint32_t magic; /* magic cookie for data integrity */ char name[MAX_NAME_LEN]; /* name of the job */ char partition[MAX_NAME_LEN]; /* name of the partition */ struct part_record *part_ptr; /* pointer to the partition record */ uint32_t user_id; /* user the job runs as */ enum job_states job_state; /* state of the job */ - uint16_t kill_on_node_fail; /* 1 if job should be killed on on failure */ - char *nodes; /* comma delimited list of nodes allocated to job */ - bitstr_t *node_bitmap; /* bitmap of nodes in allocated to job */ - uint32_t time_limit; /* maximum run time in minutes or INFINITE */ - time_t start_time; /* time execution begins, actual or expected */ - time_t end_time; /* time of termination, actual or expected */ + uint16_t kill_on_node_fail; /* 1 if job should be killed on + node failure */ + char *nodes; /* list of nodes allocated to job */ + bitstr_t *node_bitmap; /* bitmap of nodes allocated to job */ + uint32_t time_limit; /* time_limit minutes or INFINITE */ + time_t start_time; /* time execution begins, + actual or expected */ + time_t end_time; /* time of termination, + actual or expected */ time_t time_last_active; /* time of last job activity */ uint32_t priority; /* relative priority of the job */ - struct job_details *details; /* job details (set until job terminates) */ - uint16_t num_cpu_groups; /* element count in arrays cpus_per_node and cpu_count_reps */ + struct job_details *details; /* job details */ + uint16_t num_cpu_groups; /* record count in cpus_per_node and + cpu_count_reps */ uint32_t *cpus_per_node; /* array of cpus per node allocated */ - uint32_t *cpu_count_reps; /* array of consecutive nodes with same cpu count */ + uint32_t *cpu_count_reps; /* array of consecutive nodes with + same cpu count */ uint16_t next_step_id; /* next step id to be used */ - uint16_t node_cnt; /* count of nodes allocated to this job */ - slurm_addr *node_addr; /* addresses of the nodes allocated to this job */ + uint16_t node_cnt; /* count of nodes allocated to job */ + slurm_addr *node_addr; /* addresses of the nodes allocated to + job */ List step_list; /* list of job's steps */ }; struct step_record { struct job_record* job_ptr; /* ptr to the job that owns the step */ uint16_t step_id; /* step number */ - uint16_t cyclic_alloc; /* set for cyclic task allocation to nodes */ + uint16_t cyclic_alloc; /* set for cyclic task allocation + across nodes */ time_t start_time; /* step allocation time */ - bitstr_t *node_bitmap; /* bitmap of nodes in allocated to job step */ + bitstr_t *node_bitmap; /* bitmap of nodes allocated to job + step */ #ifdef HAVE_LIBELAN3 - qsw_jobinfo_t qsw_job; /* Elan3 switch context, opaque data structure */ + qsw_jobinfo_t qsw_job; /* Elan3 switch context, opaque */ #endif }; typedef struct job_step_specs step_specs; - extern List job_list; /* list of job_record entries */ -/* allocate_nodes - for a given bitmap, change the state of specified nodes to stage_in - * this is a simple prototype for testing +/*****************************************************************************\ + * Global slurmctld functions +\*****************************************************************************/ + +/* allocate_nodes - change state of specified nodes to NODE_STATE_ALLOCATED + * IN bitmap - map of nodes to be allocated + * globals: node_record_count - number of nodes in the system + * node_record_table_ptr - pointer to global node table + * last_node_update - last update time of node table */ extern void allocate_nodes (unsigned *bitmap); /* - * bitmap2node_name - given a bitmap, build a list of comma separated node names. - * names may include regular expressions (e.g. "lx[01-10]") + * bitmap2node_name - given a bitmap, build a list of comma separated node + * names. names may include regular expressions (e.g. "lx[01-10]") + * IN bitmap - bitmap pointer + * RET pointer to node list or NULL on error + * globals: node_record_table_ptr - pointer to node table * NOTE: the caller must xfree the memory at node_list when no longer required */ extern char * bitmap2node_name (bitstr_t *bitmap) ; -/* build_node_details - set cpu counts and addresses for allocated nodes */ +/* + * build_node_details - set cpu counts and addresses for allocated nodes + * IN job_ptr - pointer to a job record + * NOTE: the arrays cpus_per_node, cpu_count_reps and node_addr in the job + * details record are allocated by build_node_details and must be + * xfreed by the caller, preferably using delete_job_details + */ extern void build_node_details (struct job_record *job_ptr); -/* count_cpus - report how many cpus are associated with the identified nodes */ +/* + * count_cpus - report how many cpus are associated with the identified nodes + * IN bitmap - map of nodes to tally + * RET cpu count + * globals: node_record_count - number of nodes configured + * node_record_table_ptr - pointer to global node table + */ extern int count_cpus (unsigned *bitmap); /* - * create_config_record - create a config_record entry and set is values to the defaults. - * NOTE: memory allocated will remain in existence until delete_config_record() is called - * to deletet all configuration records + * create_config_record - create a config_record entry and set is values to + * the defaults. each config record corresponds to a line in the + * slurm.conf file and typically describes the configuration of a + * large number of nodes + * RET pointer to the config_record + * global: default_config_record - default configuration values + * NOTE: memory allocated will remain in existence until + * _delete_config_record() is called to delete all configuration records */ extern struct config_record *create_config_record (void); /* * create_job_record - create an empty job_record including job_details. * load its values with defaults (zeros, nulls, and magic cookie) - * NOTE: allocates memory that should be xfreed with list_delete_job + * IN/OUT error_code - set to zero if no error, errno otherwise + * RET pointer to the record or NULL if error + * global: job_list - global job list + * job_count - number of jobs in the system + * last_job_update - time of last job table update + * NOTE: allocates memory that should be xfreed with _list_delete_job */ extern struct job_record * create_job_record (int *error_code); /* - * create_node_record - create a node record - * NOTE: allocates memory that should be freed with delete_part_record + * create_node_record - create a node record and set its values to defaults + * IN config_point - pointer to node's configuration information + * IN node_name - name of the node + * RET pointer to the record or NULL if error + * global: default_node_record - default node values + * NOTE: the record's values are initialized to those of default_node_record, + * node_name and config_point's cpus, real_memory, and tmp_disk values + * NOTE: allocates memory at node_record_table_ptr that must be xfreed when + * the global node table is no longer required */ extern struct node_record *create_node_record (struct config_record *config_point, @@ -273,35 +343,66 @@ extern struct node_record *create_node_record (struct config_record /* * create_part_record - create a partition record + * RET a pointer to the record or NULL if error + * global: default_part - default partition parameters + * part_list - global partition list + * NOTE: the record's values are initialized to those of default_part * NOTE: allocates memory that should be xfreed with delete_part_record */ extern struct part_record *create_part_record (void); /* * create_step_record - create an empty step_record for the specified job. + * IN job_ptr - pointer to job table entry to have step record added + * RET a pointer to the record or NULL if error * NOTE: allocates memory that should be xfreed with delete_step_record */ extern struct step_record * create_step_record (struct job_record *job_ptr); -/* deallocate_nodes - for a given job, deallocate its nodes and make their state IDLE */ +/* + * deallocate_nodes - for a given job, deallocate its nodes and make + * their state NODE_STATE_IDLE + * IN job_ptr - pointer to terminating job + * globals: node_record_count - number of nodes in the system + * node_record_table_ptr - pointer to global node table + */ extern void deallocate_nodes (struct job_record * job_ptr); -/* delete_all_step_records - delete all step record for specified job_ptr */ +/* + * delete_all_step_records - delete all step record for specified job_ptr + * IN job_ptr - pointer to job table entry to have step record added + */ extern void delete_all_step_records (struct job_record *job_ptr); -/* delete_job_details - delete a job's detail record and clear it's pointer */ +/* + * delete_job_details - delete a job's detail record and clear it's pointer + * this information can be deleted as soon as the job is allocated + * resources and running (could need to restart batch job) + * IN job_entry - pointer to job_record to clear the record of + */ extern void delete_job_details (struct job_record *job_entry); -/* delete_node_record - delete record for node with specified name */ +/* + * delete_node_record - delete the node record for a node with specified name + * to avoid invalidating the bitmaps and hash table, we just clear the name + * set its state to NODE_STATE_DOWN + * IN name - name of the desired node + * RET 0 on success, errno otherwise + * global: node_record_table_ptr - pointer to global node table + */ extern int delete_node_record (char *name); -/* delete_part_record - delete record for partition with specified name */ -extern int delete_part_record (char *name); - -/* delete_step_record - delete record for job step for specified job_ptr and step_id */ +/* + * delete_step_record - delete record for job step for specified job_ptr + * and step_id + * IN job_ptr - pointer to job table entry to have step record removed + * IN step_id - id of the desired job step + * RET 0 on success, errno otherwise + */ extern int delete_step_record (struct job_record *job_ptr, uint32_t step_id); -/* dump_all_job_state - save the state of all jobs to file */ +/* dump_all_job_state - save the state of all jobs to file + * RET 0 or error code */ extern int dump_all_job_state ( void ); /* dump_all_node_state - save the state of all nodes to file */ @@ -310,69 +411,163 @@ extern int dump_all_node_state ( void ); /* dump_all_part_state - save the state of all partitions to file */ extern int dump_all_part_state ( void ); -/* dump_job_desc - dump the incoming job submit request message */ +/* + * dump_job_desc - dump the incoming job submit request message + * IN job_specs - job specification from RPC + */ extern void dump_job_desc(job_desc_msg_t * job_specs); -/* dump_step_desc - dump the incoming step initiate request message */ +/* + * dump_step_desc - dump the incoming step initiate request message + * IN step_spec - job step request specification from RPC + */ extern void dump_step_desc(step_specs *step_spec); -/* find_first_node_record - find a record for first node in the bitmap */ -extern struct node_record *find_first_node_record (bitstr_t *node_bitmap); - -/* find_job_record - return a pointer to the job record with the given job_id */ +/* + * find_job_record - return a pointer to the job record with the given job_id + * IN job_id - requested job's id + * RET pointer to the job's record, NULL on error + * global: job_list - global job list pointer + * job_hash, job_hash_over, max_hash_over - hash table into job records + */ extern struct job_record *find_job_record (uint32_t job_id); +/* + * find_first_node_record - find a record for first node in the bitmap + * IN node_bitmap + */ +extern struct node_record *find_first_node_record (bitstr_t *node_bitmap); + /* find_node_record - find a record for node with specified name */ extern struct node_record *find_node_record (char *name); -/* find_part_record - find a record for partition with specified name */ +/* + * find_part_record - find a record for partition with specified name + * IN name - name of the desired partition + * RET pointer to node partition or NULL if not found + * global: part_list - global partition list + */ extern struct part_record *find_part_record (char *name); -/* find_running_job_by_node_name - Given a node name, return a pointer to any - * job currently running on that node */ +/* + * find_running_job_by_node_name - Given a node name, return a pointer to any + * job currently running on that node + * IN node_name - name of a node + * RET pointer to the job's record, NULL if no job on node found + */ extern struct job_record *find_running_job_by_node_name (char *node_name); -/* get_job_env - return the environment variables and their count for a given job */ +/* + * get_job_env - return the environment variables and their count for a + * given job + * IN job_ptr - pointer to job for which data is required + * OUT env_size - number of elements to read + * RET point to array of string pointers containing environment variables + */ extern char **get_job_env (struct job_record *job_ptr, uint16_t *env_size); -/* get_job_script - return the script for a given job */ +/* + * get_job_script - return the script for a given job + * IN job_ptr - pointer to job for which data is required + * RET point to string containing job script + */ extern char *get_job_script (struct job_record *job_ptr); -/* find_step_record - return a pointer to the step record with the given job_id and step_id */ -extern struct step_record * find_step_record(struct job_record *job_ptr, uint16_t step_id); +/* + * find_step_record - return a pointer to the step record with the given + * job_id and step_id + * IN job_ptr - pointer to job table entry to have step record added + * IN step_id - id of the desired job step + * RET pointer to the job step's record, NULL on error + */ +extern struct step_record * find_step_record(struct job_record *job_ptr, + uint16_t step_id); /* * init_job_conf - initialize the job configuration tables and values. * this should be called after creating node information, but * before creating any job entries. + * RET 0 if no error, otherwise an error code + * global: last_job_update - time of last job table update + * job_list - pointer to global job list */ -extern int init_job_conf (); +extern int init_job_conf (void); /* - * init_node_conf - initialize the node configuration values. - * this should be called before creating any node or configuration entries. + * init_node_conf - initialize the node configuration tables and values. + * this should be called before creating any node or configuration + * entries. + * RET 0 if no error, otherwise an error code + * global: node_record_table_ptr - pointer to global node table + * default_node_record - default values for node records + * default_config_record - default values for configuration records + * hash_table - table of hash indecies + * last_node_update - time of last node table update */ extern int init_node_conf (); /* - * init_part_conf - initialize the partition configuration values. + * init_part_conf - initialize the default partition configuration values + * and create a (global) partition list. * this should be called before creating any partition entries. + * RET 0 if no error, otherwise an error code + * global: default_part - default partition values + * part_list - global partition list */ -extern int init_part_conf (); - -/* is_key_valid report if the supplied partition key is valid */ -extern int is_key_valid (void * key); +extern int init_part_conf (void); -/* job_allocate - allocate resource for the supplied job specifications */ -extern int job_allocate (job_desc_msg_t *job_specs, uint32_t *new_job_id, char **node_list, - uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, - int immediate, int will_run, int allocate, uid_t submit_uid, - uint16_t *node_cnt, slurm_addr **node_addr); +/* + * job_allocate - create job_records for the suppied job specification and + * allocate nodes for it. + * IN job_specs - job specifications + * IN node_list - location for storing new job's allocated nodes + * IN immediate - if set then either initiate the job immediately or fail + * IN will_run - don't initiate the job if set, just test if it could run + * now or later + * IN allocate - resource allocation request if set, not a full job + * OUT new_job_id - the new job's ID + * OUT num_cpu_groups - number of cpu groups (elements in cpus_per_node + * and cpu_count_reps) + * OUT cpus_per_node - pointer to array of numbers of cpus on each node + * allocate + * OUT cpu_count_reps - pointer to array of numbers of consecutive nodes + * having same cpu count + * OUT node_list - list of nodes allocated to the job + * OUT node_cnt - number of allocated nodes + * OUT node_addr - slurm_addr's for the allocated nodes + * RET 0 or an error code + * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts + * of 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} + * and cpu_count_reps={4,2,2} + * globals: job_list - pointer to global job list + * list_part - global list of partition info + * default_part_loc - pointer to default partition + */ +extern int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id, + char **node_list, uint16_t * num_cpu_groups, + uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, + int immediate, int will_run, int allocate, uid_t submit_uid, + uint16_t * node_cnt, slurm_addr ** node_addr); -/* job_cancel - cancel the specified job */ +/* + * job_cancel - cancel the specified job + * IN job_id - id of the job to be cancelled + * IN uid - uid of requesting user + * RET 0 on success, otherwise ESLURM error code + * global: job_list - pointer global job list + * last_job_update - time of last job table update + */ extern int job_cancel (uint32_t job_id, uid_t uid); -/* job_step_cancel - cancel the specified job step */ +/* + * job_step_cancel - cancel the specified job step + * IN job_id - id of the job to be cancelled + * IN step_id - id of the job step to be cancelled + * IN uid - user id of user issuing the RPC + * RET 0 on success, otherwise ESLURM error code + * global: job_list - pointer global job list + * last_job_update - time of last job table update + */ extern int job_step_cancel (uint32_t job_id, uint32_t job_step_id, uid_t uid ); /* @@ -386,188 +581,336 @@ extern int job_step_cancel (uint32_t job_id, uint32_t job_step_id, uid_t uid ); * last_job_update - time of last job table update */ extern int job_complete (uint32_t job_id, uid_t uid, bool requeue, - uint32_t job_return_code); + uint32_t job_return_code); -/* job_step_complete - note the completion the specified job step*/ -extern int job_step_complete (uint32_t job_id, uint32_t job_step_id, uid_t uid); +/* + * job_step_complete - note normal completion the specified job step + * IN job_id - id of the job to be completed + * IN step_id - id of the job step to be completed + * IN uid - user id of user issuing the RPC + * RET 0 on success, otherwise ESLURM error code + * global: job_list - pointer global job list + * last_job_update - time of last job table update + */ +extern int job_step_complete (uint32_t job_id, uint32_t job_step_id, + uid_t uid); -/* job_time_limit - enforce job time limits */ +/* + * job_time_limit - terminate jobs which have exceeded their time limit + * global: job_list - pointer global job list + * last_job_update - time of last job table update + */ extern void job_time_limit (void); -/* kill_running_job_by_node_name - Given a node name, deallocate that job - * from the node or kill it */ +/* + * kill_running_job_by_node_name - Given a node name, deallocate that job + * from the node or kill it + * IN node_name - name of a node + * RET number of killed jobs + */ extern int kill_running_job_by_node_name (char *node_name); -/* list_append_list - Appends the elements of from list onto the to list */ -extern void list_append_list( List to, List from ); - -/* list_compare_config - compare two entry from the config list based upon weight */ -extern int list_compare_config (void *config_entry1, void *config_entry2); - -/* list_delete_config - delete an entry from the configuration list */ -extern void list_delete_config (void *config_entry); -/* list_find_config - find an entry in the configuration list */ -extern int list_find_config (void *config_entry, void *key); +/* list_compare_config - compare two entry from the config list based upon + * weight, see common/list.h for documentation */ +int list_compare_config (void *config_entry1, void *config_entry2); -/* list_find_part - find an entry in the partition list */ +/* + * list_find_part - find an entry in the partition list, see common/list.h + * for documentation + * IN key - partition name or "universal_key" for all partitions + * RET 1 if matches key, 0 otherwise + * global- part_list - the global partition list + */ extern int list_find_part (void *part_entry, void *key); -/* load_job_state - load the job state from file, recover from slurmctld restart */ +/* + * load_job_state - load the job state from file, recover from last slurmctld + * checkpoint. Execute this after loading the configuration file data. + * RET 0 or error code + */ extern int load_job_state ( void ); -/* load_node_state - load the node state from file, recover from slurmctld restart */ +/* + * load_node_state - load the node state from file, recover from slurmctld + * restart. execute this after loading the configuration file data. + * data goes into common storage + */ extern int load_node_state ( void ); -/* load_part_uid_allow_list - for every partition reload the allow_uid list */ +/* + * load_part_uid_allow_list - reload the allow_uid list of partitions + * if required (updated group file or force set) + * IN force - if set then always reload the allow_uid list + */ extern void load_part_uid_allow_list ( int force ); -/* load_part_state - load the partition state from file, recover from slurmctld restart */ +/* + * load_part_state - load the partition state from file, recover from + * slurmctld restart. execute this after loading the configuration + * file data. + */ extern int load_part_state ( void ); -/* match_feature - determine if the desired feature (seek) is one of those available */ -extern int match_feature (char *seek, char *available); - -/* mkdir2 - issues system calls for mkdir (if root) */ -int mkdir2 (char * path, int modes); - -/* node_name2bitmap - given a node name regular expression, build a bitmap representation */ +/* + * node_name2bitmap - given a node name regular expression, build a bitmap + * representation + * IN node_names - list of nodes + * OUT bitmap - set to bitmap or NULL on error + * RET 0 if no error, otherwise EINVAL or enomem + * global: node_record_table_ptr - pointer to global node table + * NOTE: the caller must xfree memory at bitmap when no longer required + */ extern int node_name2bitmap (char *node_names, bitstr_t **bitmap); -/* node_did_resp - record that the specified node is responding */ +/* node_did_resp - record that the specified node is responding + * IN name - name of the node */ extern void node_did_resp (char *name); -/* node_not_resp - record that the specified node is not responding */ +/* node_not_resp - record that the specified node is not responding + * IN name - name of the node */ extern void node_not_resp (char *name); -/* old_job_info - get details about an existing job allocation */ +/* + * old_job_info - get details about an existing job allocation + * IN uid - job issuing the code + * IN job_id - ID of job for which info is requested + * OUT everything else - the job's detains + */ extern int old_job_info (uint32_t uid, uint32_t job_id, char **node_list, - uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, + uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, + uint32_t ** cpu_count_reps, uint16_t * node_cnt, slurm_addr ** node_addr); /* * pack_all_jobs - dump all job information for all jobs in * machine independent form (for network transmission) - * NOTE: the caller must xfree the buffer at *buffer_ptr when no longer required + * OUT buffer_ptr - the pointer is set to the allocated buffer. + * OUT buffer_size - set to size of the buffer in bytes + * IN/OUT update_time - dump new data only if job records updated since time + * specified, otherwise return empty buffer, set to time partition + * records last updated + * global: job_list - global list of job records + * NOTE: the buffer at *buffer_ptr must be xfreed by the caller + * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c + * whenever the data format changes */ -extern void pack_all_jobs (char **buffer_ptr, int *buffer_size, time_t * update_time) -; +extern void pack_all_jobs (char **buffer_ptr, int *buffer_size, + time_t * update_time); -/* pack_all_node - dump all configuration and node information for all nodes in - * machine independent form (for network transmission) - * NOTE: the caller must xfree the buffer at *buffer_ptr when no longer required +/* + * pack_all_node - dump all configuration and node information for all nodes + * in machine independent form (for network transmission) + * OUT buffer_ptr - pointer to the stored data + * OUT buffer_size - set to size of the buffer in bytes + * IN/OUT update_time - dump new data only if partition records updated since + * time specified, otherwise return empty buffer, set to time partition + * records last updated + * global: node_record_table_ptr - pointer to global node table + * NOTE: the caller must xfree the buffer at *buffer_ptr + * NOTE: change slurm_load_node() in api/node_info.c when data format changes */ -extern void pack_all_node (char **buffer_ptr, int *buffer_size, time_t * update_time); +extern void pack_all_node (char **buffer_ptr, int *buffer_size, + time_t * update_time); -/* pack_ctld_job_step_info_response_msg - packs the message +/* + * pack_ctld_job_step_info_response_msg - packs job step info * IN - job_id and step_id - zero for all - * OUT - packed buffer and length NOTE- MUST free_buf buffer - * return - error code + * OUT buffer - location to store data, pointers automatically advanced + * RET - 0 or error code + * NOTE: MUST free_buf buffer */ -extern int pack_ctld_job_step_info_response_msg ( uint32_t job_id, uint32_t step_id, Buf buffer ); +extern int pack_ctld_job_step_info_response_msg ( uint32_t job_id, + uint32_t step_id, Buf buffer ); -/* pack_ctld_job_step_info - packs a job_step_info_t from a step_record - */ -extern void pack_ctld_job_step_info( struct step_record* step, Buf buffer); - /* * pack_all_part - dump all partition information for all partitions in * machine independent form (for network transmission) - * NOTE: the caller must xfree the buffer at *buffer_ptr when no longer required + * OUT buffer_ptr - the pointer is set to the allocated buffer. + * OUT buffer_size - set to size of the buffer in bytes + * IN/OUT update_time - dump new data only if partition records updated , + * set to time partition records last updated + * since time specified, otherwise return empty buffer + * global: part_list - global list of partition records + * NOTE: the buffer at *buffer_ptr must be xfreed by the caller + * NOTE: change slurm_load_part() in api/part_info.c if data format changes */ -extern void pack_all_part (char **buffer_ptr, int *buffer_size, time_t * update_time); +extern void pack_all_part (char **buffer_ptr, int *buffer_size, + time_t * update_time); /* * pack_job - dump all configuration information about a specific job in * machine independent form (for network transmission) + * IN dump_job_ptr - pointer to job for which information is requested + * IN/OUT buffer - buffer in which data is placed, pointers automatically + * updated + * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c + * whenever the data format changes */ extern void pack_job (struct job_record *dump_job_ptr, Buf buffer); /* - * pack_part - dump all configuration information about a specific partition in - * machine independent form (for network transmission) + * pack_part - dump all configuration information about a specific partition + * in machine independent form (for network transmission) + * IN dump_part_ptr - pointer to partition for which information is requested + * IN/OUT buffer - buffer in which data is placed, pointers automatically + * updated + * global: default_part_loc - pointer to the default partition + * NOTE: if you make any changes here be sure to make the corresponding + * changes to load_part_config in api/partition_info.c */ extern void pack_part (struct part_record *part_record_point, Buf buffer); -/* ping_nodes - check that all nodes and daemons are alive */ +/* ping_nodes - check that all nodes and daemons are alive, + * get nodes in UNKNOWN state to register */ extern void ping_nodes (void); /* - * purge_old_job - purge old job records. if memory space is needed. + * purge_old_job - purge old job records. * the jobs must have completed at least MIN_JOB_AGE minutes ago + * global: job_list - global job table + * last_job_update - time of last job table update */ void purge_old_job (void); -/* read_slurm_conf - load the slurm configuration from the configured file */ +/* + * read_slurm_conf - load the slurm configuration from the configured file. + * read_slurm_conf can be called more than once if so desired. + * IN recover - set to use state saved from last slurmctld shutdown + * RET 0 if no error, otherwise an error code + * Note: Operates on common variables only + */ extern int read_slurm_conf (int recover); -/* rehash - build a hash table of the node_record entries */ +/* + * rehash - build a hash table of the node_record entries. this is a large + * hash table to permit the immediate finding of a record based only + * upon its name without regards to their number. there should be no + * need for a search. + * global: node_record_table_ptr - pointer to global node table + * hash_table - table of hash indecies + * NOTE: manages memory for hash_table + */ extern void rehash (void); -/* reset_job_bitmaps - reestablish bitmaps for existing jobs */ +/* + * reset_job_bitmaps - reestablish bitmaps for existing jobs. + * this should be called after rebuilding node information, + * but before using any job entries. + * global: last_job_update - time of last job table update + * job_list - pointer to global job list + */ extern void reset_job_bitmaps (void); -/* rmdir2 - issues system call to rmdir (if root) */ -extern int rmdir2 (char * path); - -/* schedule - attempt to schedule all pending jobs */ +/* + * schedule - attempt to schedule all pending jobs + * pending jobs for each partition will be scheduled in priority + * order until a request fails + * RET count of jobs scheduled + * global: job_list - global list of job records + * last_job_update - time of last update to job table + * Note: We re-build the queue every time. Jobs can not only be added + * or removed from the queue, but have their priority or partition + * changed with the update_job RPC. In general nodes will be in priority + * order (by submit time), so the sorting should be pretty fast. + */ extern int schedule (void); -/* select_nodes - select and allocate nodes to a specific job */ +/* + * select_nodes - select and allocate nodes to a specific job + * IN job_ptr - pointer to the job record + * IN test_only - do not allocate nodes, just confirm they could be + * allocated now + * RET 0 on success, ESLURM code from slurm_errno.h otherwise + * globals: list_part - global list of partition info + * default_part_loc - pointer to default partition + * config_list - global list of node configuration info + * Notes: The algorithm is + * 1) Build a table (node_set_ptr) of nodes with the requisite + * configuration. Each table entry includes their weight, + * node_list, features, etc. + * 2) Call _pick_best_nodes() to select those nodes best satisfying + * the request, (e.g. best-fit or other criterion) + * 3) Call allocate_nodes() to perform the actual allocation + */ extern int select_nodes (struct job_record *job_ptr, int test_only); -/* set_job_id - set a default job_id, insure that it is unique */ -extern void set_job_id (struct job_record *job_ptr); - -/* set_job_prio - set a default job priority */ -extern void set_job_prio (struct job_record *job_ptr); - -/* set_slurmd_addr - establish the slurm_addr for the slurmd on each node */ +/* set_slurmd_addr - establish the slurm_addr for the slurmd on each node + * Uses common data structures. */ extern void set_slurmd_addr (void); -/* step_count - return a count of steps associated with a specific job */ -extern int step_count (struct job_record *job_ptr); - -/* step_create - parse the suppied job step specification and create step_records for it */ +/* + * step_create - creates a step_record in step_specs->job_id, sets up the + * accoding to the step_specs. + * IN step_specs - job step specifications + * OUT new_step_record - pointer to the new step_record (NULL on error) + * RET - 0 or error code + * NOTE: don't free the returned step_record because that is managed through + * the job. + */ extern int step_create ( step_specs *step_specs, struct step_record** ); -/* step_lock - lock the step information */ -extern void step_lock (void); - -/* step_unlock - unlock the step information */ -extern void step_unlock (void); - -/* sync_nodes_to_jobs - sync the node state to job states on slurmctld restart */ -extern int sync_nodes_to_jobs (void); - -/* update_job - update a job's parameters per the supplied specification */ +/* + * update_job - update a job's parameters per the supplied specifications + * IN job_specs - a job's specification + * IN uid - uid of user issuing RPC + * RET returns an error code from common/slurm_errno.h + * global: job_list - global list of job entries + * last_job_update - time of last job table update + */ extern int update_job (job_desc_msg_t * job_specs, uid_t uid); -/* update_node - update the configuration data for one or more nodes per the supplied specification */ +/* + * update_node - update the configuration data for one or more nodes + * IN update_node_msg - update node request + * RET 0 or error code + * global: node_record_table_ptr - pointer to global node table + */ extern int update_node ( update_node_msg_t * update_node_msg ) ; -/* update_part - update a partition's configuration data per the supplied specification */ +/* + * update_part - update a partition's configuration data + * IN part_desc - description of partition changes + * RET 0 or an error code + * global: part_list - list of partition entries + * last_part_update - update time of partition records + */ extern int update_part (update_part_msg_t * part_desc ); -/* validate_group - validate that the submit uid is authorized to run in this partition */ +/* + * validate_group - validate that the submit uid is authorized to run in + * this partition + * IN part_ptr - pointer to a partition + * IN submit_uid - user submitting the job + * RET 1 if permitted to run, 0 otherwise + */ extern int validate_group (struct part_record *part_ptr, uid_t submit_uid); -/* validate_jobs_on_node - validate that any jobs that should be on the node are - * actually running, if not clean up the job records and/or node records, - * call this function after validate_node_specs() sets the node state properly */ +/* + * validate_node_specs - validate the node's specifications as valid, + * if not set state to down, in any case update last_response + * IN node_name - name of the node + * IN cpus - number of cpus measured + * IN real_memory - mega_bytes of real_memory measured + * IN tmp_disk - mega_bytes of tmp_disk measured + * RET 0 if no error, ENOENT if no such node, EINVAL if values too low + * global: node_record_table_ptr - pointer to global node table + */ extern void validate_jobs_on_node ( char *node_name, uint32_t *job_count, uint32_t *job_id_ptr, uint16_t *step_id_ptr); -/* validate_node_specs - validate the node's specifications as valid */ +/* + * validate_node_specs - validate the node's specifications as valid, + * if not set state to down, in any case update last_response + * IN node_name - name of the node + * IN cpus - number of cpus measured + * IN real_memory - mega_bytes of real_memory measured + * IN tmp_disk - mega_bytes of tmp_disk measured + * RET 0 if no error, ENOENT if no such node, EINVAL if values too low + * global: node_record_table_ptr - pointer to global node table + */ extern int validate_node_specs (char *node_name, uint32_t cpus, uint32_t real_memory, uint32_t tmp_disk, uint32_t job_count); -/* - * yes_or_no - map string into integer - * input: in_string: pointer to string containing "YES" or "NO" - * output: returns 1 for "YES", 0 for "NO", -1 otherwise - */ -extern int yes_or_no (char *in_string); - #endif /* !_HAVE_SLURM_H */ diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index cc213a15918..2dc0c81047e 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -25,7 +25,7 @@ \*****************************************************************************/ #ifdef HAVE_CONFIG_H -# include <config.h> +# include "config.h" #endif #include <time.h> @@ -34,24 +34,27 @@ #include <errno.h> #include <stdio.h> #include <stdlib.h> +#include <sys/types.h> #include <string.h> +#include <unistd.h> #ifdef HAVE_LIBELAN3 -# include <src/common/qsw.h> +# include "src/common/qsw.h" #endif -#include <src/common/bitstring.h> -#include <src/common/slurm_errno.h> -#include <src/slurmctld/locks.h> -#include <src/slurmctld/slurmctld.h> +#include "src/common/bitstring.h" +#include "src/common/slurm_errno.h" +#include "src/slurmctld/locks.h" +#include "src/slurmctld/slurmctld.h" -bitstr_t * pick_step_nodes (struct job_record *job_ptr, - step_specs *step_spec ); +static void _pack_ctld_job_step_info(struct step_record *step, Buf buffer); +static bitstr_t * _pick_step_nodes (struct job_record *job_ptr, + step_specs *step_spec ); /* * create_step_record - create an empty step_record for the specified job. - * input: job_ptr - pointer to job table entry to have step record added - * output: returns a pointer to the record or NULL if error + * IN job_ptr - pointer to job table entry to have step record added + * RET a pointer to the record or NULL if error * NOTE: allocates memory that should be xfreed with delete_step_record */ struct step_record * @@ -76,8 +79,7 @@ create_step_record (struct job_record *job_ptr) /* * delete_all_step_records - delete all step record for specified job_ptr - * input: job_ptr - pointer to job table entry to have step record added - * output: return 0 on success, errno otherwise + * IN job_ptr - pointer to job table entry to have step record added */ void delete_all_step_records (struct job_record *job_ptr) @@ -105,10 +107,10 @@ delete_all_step_records (struct job_record *job_ptr) /* * delete_step_record - delete record for job step for specified job_ptr -* and step_id - * input: job_ptr - pointer to job table entry to have step record removed - * step_id - id of the desired job step - * output: return 0 on success, errno otherwise + * and step_id + * IN job_ptr - pointer to job table entry to have step record removed + * IN step_id - id of the desired job step + * RET 0 on success, errno otherwise */ int delete_step_record (struct job_record *job_ptr, uint32_t step_id) @@ -141,17 +143,20 @@ delete_step_record (struct job_record *job_ptr, uint32_t step_id) } -/* dump_step_desc - dump the incoming step initiate request message */ +/* + * dump_step_desc - dump the incoming step initiate request message + * IN step_spec - job step request specification from RPC + */ void dump_step_desc(step_specs *step_spec) { if (step_spec == NULL) return; - debug3("StepDesc: user_id=%u job_id=%u node_count=%u, cpu_count=%u\n", + debug3("StepDesc: user_id=%u job_id=%u node_count=%u, cpu_count=%u", step_spec->user_id, step_spec->job_id, step_spec->node_count, step_spec->cpu_count); - debug3(" relative=%u task_dist=%u node_list=%s\n", + debug3(" relative=%u task_dist=%u node_list=%s", step_spec->relative, step_spec->task_dist, step_spec->node_list); } @@ -160,9 +165,9 @@ dump_step_desc(step_specs *step_spec) /* * find_step_record - return a pointer to the step record with the given * job_id and step_id - * input: job_ptr - pointer to job table entry to have step record added - * step_id - id of the desired job step - * output: pointer to the job step's record, NULL on error + * IN job_ptr - pointer to job table entry to have step record added + * IN step_id - id of the desired job step + * RET pointer to the job step's record, NULL on error */ struct step_record * find_step_record(struct job_record *job_ptr, uint16_t step_id) @@ -188,14 +193,110 @@ find_step_record(struct job_record *job_ptr, uint16_t step_id) /* - * pick_step_nodes - select nodes for a job step that satify its requirements + * job_step_cancel - cancel the specified job step + * IN job_id - id of the job to be cancelled + * IN step_id - id of the job step to be cancelled + * IN uid - user id of user issuing the RPC + * RET 0 on success, otherwise ESLURM error code + * global: job_list - pointer global job list + * last_job_update - time of last job table update + */ +int job_step_cancel(uint32_t job_id, uint32_t step_id, uid_t uid) +{ + struct job_record *job_ptr; + int error_code; + + job_ptr = find_job_record(job_id); + if (job_ptr == NULL) { + + error("job_step_cancel: invalid job id %u", job_id); + return ESLURM_INVALID_JOB_ID; + } + + if ((job_ptr->job_state == JOB_FAILED) || + (job_ptr->job_state == JOB_COMPLETE) || + (job_ptr->job_state == JOB_TIMEOUT)) + return ESLURM_ALREADY_DONE; + + if ((job_ptr->user_id != uid) && (uid != 0) && (uid != getuid())) { + error("Security violation, JOB_CANCEL RPC from uid %d", + uid); + return ESLURM_USER_ID_MISSING; + } + + if (job_ptr->job_state == JOB_RUNNING) { + last_job_update = time(NULL); + error_code = delete_step_record(job_ptr, step_id); + if (error_code == ENOENT) { + info("job_step_cancel step %u.%u not found", + job_id, step_id); + return ESLURM_ALREADY_DONE; + } + + job_ptr->time_last_active = time(NULL); + return SLURM_SUCCESS; + } + + info("job_step_cancel: step %u.%u can't be cancelled from state=%s", + job_id, step_id, job_state_string(job_ptr->job_state)); + return ESLURM_TRANSITION_STATE_NO_UPDATE; + +} + + +/* + * job_step_complete - note normal completion the specified job step + * IN job_id - id of the job to be completed + * IN step_id - id of the job step to be completed + * IN uid - user id of user issuing the RPC + * RET 0 on success, otherwise ESLURM error code + * global: job_list - pointer global job list + * last_job_update - time of last job table update + */ +int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid) +{ + struct job_record *job_ptr; + int error_code; + + job_ptr = find_job_record(job_id); + if (job_ptr == NULL) { + info("job_step_complete: invalid job id %u", job_id); + return ESLURM_INVALID_JOB_ID; + } + + if ((job_ptr->job_state == JOB_FAILED) || + (job_ptr->job_state == JOB_COMPLETE) || + (job_ptr->job_state == JOB_TIMEOUT)) + return ESLURM_ALREADY_DONE; + + if ((job_ptr->user_id != uid) && (uid != 0) && (uid != getuid())) { + error("Security violation, JOB_COMPLETE RPC from uid %d", + uid); + return ESLURM_USER_ID_MISSING; + } + + last_job_update = time(NULL); + error_code = delete_step_record(job_ptr, step_id); + if (error_code == ENOENT) { + info("job_step_complete step %u.%u not found", job_id, + step_id); + return ESLURM_ALREADY_DONE; + } + return SLURM_SUCCESS; +} + + +/* + * _pick_step_nodes - select nodes for a job step that satify its requirements * we satify the super-set of constraints. + * IN job_ptr - pointer to job to have new step started + * IN step_spec - job step specification * global: node_record_table_ptr - pointer to global node table * NOTE: returns all of a job's nodes if step_spec->node_count == INFINITE * NOTE: returned bitmap must be freed by the caller using bit_free() */ -bitstr_t * -pick_step_nodes (struct job_record *job_ptr, step_specs *step_spec ) { +static bitstr_t * +_pick_step_nodes (struct job_record *job_ptr, step_specs *step_spec ) { bitstr_t *nodes_avail = NULL, *nodes_picked = NULL, *node_tmp = NULL; int error_code, nodes_picked_cnt = 0, cpus_picked_cnt, i; @@ -213,17 +314,17 @@ pick_step_nodes (struct job_record *job_ptr, step_specs *step_spec ) { error_code = node_name2bitmap (step_spec->node_list, &nodes_picked); if (error_code) { - info ("pick_step_nodes: invalid node list %s", + info ("_pick_step_nodes: invalid node list %s", step_spec->node_list); goto cleanup; } if (bit_super_set (nodes_picked, job_ptr->node_bitmap) == 0) { - info ("pick_step_nodes: requested nodes %s not part of job %u", + info ("_pick_step_nodes: requested nodes %s not part of job %u", step_spec->node_list, job_ptr->job_id); goto cleanup; } if (bit_super_set (nodes_picked, up_node_bitmap) == 0) { - info ("pick_step_nodes: some requested node %s is/are down", + info ("_pick_step_nodes: some requested node %s is/are down", step_spec->node_list); goto cleanup; } @@ -235,7 +336,7 @@ pick_step_nodes (struct job_record *job_ptr, step_specs *step_spec ) { relative_nodes = bit_pick_cnt (nodes_avail, step_spec->relative); if (relative_nodes == NULL) { - info ("pick_step_nodes: Invalid relative value (%u) for job %u", + info ("_pick_step_nodes: Invalid relative value (%u) for job %u", step_spec->relative, job_ptr->job_id); goto cleanup; } @@ -310,9 +411,9 @@ cleanup: /* * step_create - creates a step_record in step_specs->job_id, sets up the * accoding to the step_specs. - * input: step_specs - job step specifications - * output: SUCCESS: returns a pointer to the step_record - * FAILURE: sets slurm_srrno appropriately and returns + * IN step_specs - job step specifications + * OUT new_step_record - pointer to the new step_record (NULL on error) + * RET - 0 or error code * NOTE: don't free the returned step_record because that is managed through * the job. */ @@ -349,7 +450,7 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record ) return ESLURM_BAD_DIST; #endif - nodeset = pick_step_nodes (job_ptr, step_specs ); + nodeset = _pick_step_nodes (job_ptr, step_specs ); if (nodeset == NULL) return ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE ; @@ -401,30 +502,101 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record ) return SLURM_SUCCESS; } -/* - * step_count - return a count of steps associated with a specific job - * input: job_ptr - pointer to job table entry to have step record added - * output: returns count of job steps +/* Pack the data for a specific job step record + * IN step - pointer to a job step record + * IN/OUT buffer - location to store data, pointers automatically advanced */ -int -step_count (struct job_record *job_ptr) +static void _pack_ctld_job_step_info(struct step_record *step, Buf buffer) { - int step_count = 0; - ListIterator step_record_iterator; - struct step_record *step_record_point; - - if (job_ptr == NULL) - return step_count; - - step_record_iterator = list_iterator_create (job_ptr->step_list); + char *node_list; - while ((step_record_point = (struct step_record *) - list_next (step_record_iterator))) { - step_count++; - } + if (step->node_bitmap) + node_list = bitmap2node_name(step->node_bitmap); + else { + node_list = xmalloc(1); + node_list[0] = '\0'; + } - list_iterator_destroy (step_record_iterator); - return step_count; + pack_job_step_info_members(step->job_ptr->job_id, + step->step_id, + step->job_ptr->user_id, + step->start_time, + step->job_ptr->partition, + node_list, buffer); + xfree(node_list); } +/* + * pack_ctld_job_step_info_response_msg - packs job step info + * IN - job_id and step_id - zero for all + * OUT buffer - location to store data, pointers automatically advanced + * RET - 0 or error code + * NOTE: MUST free_buf buffer + */ +int pack_ctld_job_step_info_response_msg(uint32_t job_id, + uint32_t step_id, Buf buffer) +{ + ListIterator job_record_iterator; + ListIterator step_record_iterator; + int error_code = 0; + uint32_t steps_packed = 0, tmp_offset; + struct step_record *step_ptr; + struct job_record *job_ptr; + + pack_time(last_job_update, buffer); + pack32(steps_packed, buffer); /* steps_packed placeholder */ + + if (job_id == 0) { + /* Return all steps for all jobs */ + job_record_iterator = list_iterator_create(job_list); + while ((job_ptr = + (struct job_record *) + list_next(job_record_iterator))) { + step_record_iterator = + list_iterator_create(job_ptr->step_list); + while ((step_ptr = + (struct step_record *) + list_next(step_record_iterator))) { + _pack_ctld_job_step_info(step_ptr, buffer); + steps_packed++; + } + list_iterator_destroy(step_record_iterator); + } + list_iterator_destroy(job_record_iterator); + + } else if (step_id == 0) { + /* Return all steps for specific job_id */ + job_ptr = find_job_record(job_id); + if (job_ptr) { + step_record_iterator = + list_iterator_create(job_ptr->step_list); + while ((step_ptr = + (struct step_record *) + list_next(step_record_iterator))) { + _pack_ctld_job_step_info(step_ptr, buffer); + steps_packed++; + } + list_iterator_destroy(step_record_iterator); + } else + error_code = ESLURM_INVALID_JOB_ID; + } else { + /* Return step with give step_id/job_id */ + job_ptr = find_job_record(job_id); + step_ptr = find_step_record(job_ptr, step_id); + if (step_ptr == NULL) + error_code = ESLURM_INVALID_JOB_ID; + else { + _pack_ctld_job_step_info(step_ptr, buffer); + steps_packed++; + } + } + + /* put the real record count in the message body header */ + tmp_offset = get_buf_offset(buffer); + set_buf_offset(buffer, 0); + pack_time(last_job_update, buffer); + pack32(steps_packed, buffer); + set_buf_offset(buffer, tmp_offset); + return error_code; +} -- GitLab