ping_nodes.c 9.25 KiB
/*****************************************************************************\
* ping_nodes.c - ping the slurmd daemons to test if they respond
* Note: there is a global node table (node_record_table_ptr)
*****************************************************************************
* Copyright (C) 2003 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Morris Jette <jette1@llnl.gov> et. al.
* UCRL-CODE-2002-040.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.llnl.gov/linux/slurm/>.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
\*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#ifdef WITH_PTHREADS
# include <pthread.h>
#endif
#include <time.h>
#include <string.h>
#include "src/common/hostlist.h"
#include "src/slurmctld/agent.h"
#include "src/slurmctld/ping_nodes.h"
#include "src/slurmctld/slurmctld.h"
/* Attempt to fork a thread at most MAX_RETRIES times before aborting */
#define MAX_RETRIES 10
/* Request that nodes re-register at most every MAX_REG_FREQUENCY pings */
#define MAX_REG_FREQUENCY 20
/* Spawn no more than MAX_REG_THREADS for node re-registration */
#define MAX_REG_THREADS (MAX_SERVER_THREADS - 2)
static pthread_mutex_t lock_mutex = PTHREAD_MUTEX_INITIALIZER;
static int ping_count = 0;
/*
* is_ping_done - test if the last node ping cycle has completed.
* Use this to avoid starting a new set of ping requests before the
* previous one completes
* RET true if ping process is done, false otherwise
*/
bool is_ping_done (void)
{
bool is_done = true;
slurm_mutex_lock(&lock_mutex);
if (ping_count)
is_done = false;
slurm_mutex_unlock(&lock_mutex);
return is_done;
}
/*
* ping_begin - record that a ping cycle has begin. This can be called more
* than once (for REQUEST_PING and simultaneous REQUEST_NODE_REGISTRATION
* for selected nodes). Matching ping_end calls must be made for each
* before is_ping_done returns true.
*/
void ping_begin (void)
{
slurm_mutex_lock(&lock_mutex);
ping_count++;
slurm_mutex_unlock(&lock_mutex);
}
/*
* ping_end - record that a ping cycle has ended. This can be called more
* than once (for REQUEST_PING and simultaneous REQUEST_NODE_REGISTRATION
* for selected nodes). Matching ping_end calls must be made for each
* before is_ping_done returns true.
*/
void ping_end (void)
{
slurm_mutex_lock(&lock_mutex);
if (ping_count > 0)
ping_count--;
else
fatal ("ping_count < 0");
slurm_mutex_unlock(&lock_mutex);
}
/*
* ping_nodes - check that all nodes and daemons are alive,
* get nodes in UNKNOWN state to register
*/
void ping_nodes (void)
{
static int offset = 0; /* mutex via node table write lock on entry */
int i, pos, retries = 0;
time_t now, still_live_time, node_dead_time;
static time_t last_ping_time = (time_t) 0;
uint16_t base_state, no_resp_flag;
hostlist_t ping_hostlist = hostlist_create("");
hostlist_t reg_hostlist = hostlist_create("");
hostlist_t down_hostlist = NULL;
char host_str[64];
int ping_buf_rec_size = 0;
agent_arg_t *ping_agent_args;
pthread_attr_t ping_attr_agent;
pthread_t ping_thread_agent;
int reg_buf_rec_size = 0;
agent_arg_t *reg_agent_args;
pthread_attr_t reg_attr_agent;
pthread_t reg_thread_agent;
ping_agent_args = xmalloc (sizeof (agent_arg_t));
ping_agent_args->msg_type = REQUEST_PING;
ping_agent_args->retry = 0;
reg_agent_args = xmalloc (sizeof (agent_arg_t));
reg_agent_args->msg_type = REQUEST_NODE_REGISTRATION_STATUS;
reg_agent_args->retry = 0;
/*
* If there are a large number of down nodes, the node ping
* can take a long time to complete:
* ping_time = down_nodes * agent_timeout / agent_parallelism
* ping_time = down_nodes * 10_seconds / 10
* ping_time = down_nodes (seconds)
* Because of this, we extend the SlurmdTimeout by the
* time needed to complete a ping of all nodes.
*/
now = time (NULL);
if ( (slurmctld_conf.slurmd_timeout == 0) ||
(last_ping_time == (time_t) 0) )
node_dead_time = (time_t) 0;
else
node_dead_time = last_ping_time - slurmctld_conf.slurmd_timeout;
still_live_time = now - slurmctld_conf.heartbeat_interval;
last_ping_time = now;
offset += MAX_REG_THREADS;
if ((offset > node_record_count) &&
(offset >= (MAX_REG_THREADS * MAX_REG_FREQUENCY)))
offset = 0;
for (i = 0; i < node_record_count; i++) {
struct node_record *node_ptr;
node_ptr = &node_record_table_ptr[i];
if (node_ptr->last_response >= still_live_time)
continue;
base_state = node_ptr->node_state & (~NODE_STATE_NO_RESPOND);
no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND;
if ((node_ptr->last_response != (time_t)0) &&
(node_ptr->last_response <= node_dead_time) &&
((base_state != NODE_STATE_DOWN) &&
(base_state != NODE_STATE_DRAINED))) {
if (down_hostlist)
(void) hostlist_push_host(down_hostlist,
node_ptr->name);
else
down_hostlist = hostlist_create(node_ptr->name);
set_node_down(node_ptr->name, "Not responding");
continue;
}
if (node_ptr->last_response == (time_t)0) {
no_resp_flag = 1;
node_ptr->last_response = slurmctld_conf.last_update;
}
#ifdef HAVE_FRONT_END /* Operate only on front-end */
if (i > 0)
continue;
#endif
/* Request a node registration if its state is UNKNOWN or
* on a periodic basis (about every MAX_REG_FREQUENCY ping,
* this mechanism avoids an additional (per node) timer or
* counter and gets updated configuration information
* once in a while). We limit these requests since they
* can generate a flood of incomming RPCs. */
if ((base_state == NODE_STATE_UNKNOWN) || no_resp_flag ||
((i >= offset) && (i < (offset + MAX_REG_THREADS)))) {
(void) hostlist_push_host(reg_hostlist, node_ptr->name);
if ((reg_agent_args->node_count+1) >
reg_buf_rec_size) {
reg_buf_rec_size += 32;
xrealloc ((reg_agent_args->slurm_addr),
(sizeof (struct sockaddr_in) *
reg_buf_rec_size));
xrealloc ((reg_agent_args->node_names),
(MAX_NAME_LEN * reg_buf_rec_size));
}
reg_agent_args->slurm_addr[reg_agent_args->node_count] =
node_ptr->slurm_addr;
pos = MAX_NAME_LEN * reg_agent_args->node_count;
strncpy (®_agent_args->node_names[pos],
node_ptr->name, MAX_NAME_LEN);
reg_agent_args->node_count++;
continue;
}
(void) hostlist_push_host(ping_hostlist, node_ptr->name);
if ((ping_agent_args->node_count+1) > ping_buf_rec_size) {
ping_buf_rec_size += 32;
xrealloc ((ping_agent_args->slurm_addr),
(sizeof (struct sockaddr_in) *
ping_buf_rec_size));
xrealloc ((ping_agent_args->node_names),
(MAX_NAME_LEN * ping_buf_rec_size));
}
ping_agent_args->slurm_addr[ping_agent_args->node_count] =
node_ptr->slurm_addr;
pos = MAX_NAME_LEN * ping_agent_args->node_count;
strncpy (&ping_agent_args->node_names[pos],
node_ptr->name, MAX_NAME_LEN);
ping_agent_args->node_count++;
}
if (ping_agent_args->node_count == 0)
xfree (ping_agent_args);
else {
hostlist_uniq(ping_hostlist);
hostlist_ranged_string(ping_hostlist,
sizeof(host_str), host_str);
debug2 ("Spawning ping agent for %s", host_str);
ping_begin();
slurm_attr_init (&ping_attr_agent);
if (pthread_attr_setdetachstate (&ping_attr_agent,
PTHREAD_CREATE_DETACHED))
error ("pthread_attr_setdetachstate error %m");
while (pthread_create (&ping_thread_agent, &ping_attr_agent,
agent, (void *)ping_agent_args)) {
error ("pthread_create error %m");
if (++retries > MAX_RETRIES)
fatal("Can't create pthread");
sleep (1); /* sleep and try again */
}
}
if (reg_agent_args->node_count == 0)
xfree (reg_agent_args);
else {
hostlist_uniq(reg_hostlist);
hostlist_ranged_string(reg_hostlist,
sizeof(host_str), host_str);
debug2 ("Spawning registration agent for %s", host_str);
ping_begin();
slurm_attr_init (®_attr_agent);
if (pthread_attr_setdetachstate (®_attr_agent,
PTHREAD_CREATE_DETACHED))
error ("pthread_attr_setdetachstate error %m");
while (pthread_create (®_thread_agent, ®_attr_agent,
agent, (void *)reg_agent_args)) {
error ("pthread_create error %m");
if (++retries > MAX_RETRIES)
fatal("Can't create pthread");
sleep (1); /* sleep and try again */
}
}
if (down_hostlist) {
hostlist_uniq(down_hostlist);
hostlist_ranged_string(down_hostlist,
sizeof(host_str), host_str);
error("Nodes %s not responding, setting DOWN", host_str);
hostlist_destroy(down_hostlist);
}
hostlist_destroy(ping_hostlist);
hostlist_destroy(reg_hostlist);
}