Skip to content
Snippets Groups Projects
slurmctld.h 44 KiB
Newer Older
Moe Jette's avatar
Moe Jette committed
/*****************************************************************************\
 *  slurmctld.h - definitions of functions and structures for slurmcltd use
Moe Jette's avatar
Moe Jette committed
 *****************************************************************************
 *  Copyright (C) 2002 The Regents of the University of California.
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 *  Written by Morris Jette <jette@llnl.gov> et. al.
Moe Jette's avatar
Moe Jette committed
 *  UCRL-CODE-2002-040.
 *  
 *  This file is part of SLURM, a resource management program.
 *  For details, see <http://www.llnl.gov/linux/slurm/>.
 *  
 *  SLURM is free software; you can redistribute it and/or modify it under
 *  the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 2 of the License, or (at your option)
 *  any later version.
 *  
 *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *  
 *  You should have received a copy of the GNU General Public License along
 *  with SLURM; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
\*****************************************************************************/

#ifndef _HAVE_SLURM_H
#define _HAVE_SLURM_H

#  if HAVE_INTTYPES_H
#    include <inttypes.h>
#  else
#    if HAVE_STDINT_H
#      include <stdint.h>
#    endif
#  endif			/* HAVE_INTTYPES_H */
#endif

#include <pthread.h>
/* #include <stdlib.h> */
#include <time.h>
#include <sys/types.h>
#ifdef WITH_PTHREADS
#  include <pthread.h>
#endif				/* WITH_PTHREADS */

#include <slurm/slurm.h>

#include "src/common/bitstring.h"
Moe Jette's avatar
Moe Jette committed
#include "src/common/checkpoint.h"
#include "src/common/list.h"
#include "src/common/log.h"
#include "src/common/macros.h"
#include "src/common/pack.h"
#include "src/common/slurm_cred.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/switch.h"
#include "src/common/xmalloc.h"
#define FREE_NULL_BITMAP(_X)		\
	do {				\
		if (_X) bit_free (_X);	\
		_X	= NULL; 	\
	} while (0)
#define IS_JOB_FINISHED(_X)		\
	((_X->job_state & (~JOB_COMPLETING)) >  JOB_RUNNING)
#define IS_JOB_PENDING(_X)		\
	((_X->job_state & (~JOB_COMPLETING)) == JOB_PENDING)
/*****************************************************************************\
 *  GENERAL CONFIGURATION parameters and data structures
\*****************************************************************************/
/* Maximum parallel threads to service incoming RPCs */
#define MAX_SERVER_THREADS 60
/* Perform full slurmctld's state every PERIODIC_CHECKPOINT seconds */
/* Retry an incomplete RPC agent request every RPC_RETRY_INTERVAL seconds */
#define	RPC_RETRY_INTERVAL	60
/* Attempt to schedule jobs every PERIODIC_SCHEDULE seconds despite 
 * any RPC activity. This will catch any state transisions that may 
 * have otherwise been missed */

/* Check for jobs reaching their time limit every PERIODIC_TIMEOUT seconds */
#define	PERIODIC_TIMEOUT	60

/* Pathname of group file record for checking update times */
#define GROUP_FILE	"/etc/group"

/* Check for updates to GROUP_FILE every PERIODIC_GROUP_CHECK seconds, 
 * Update the group uid_t access list as needed */
#define	PERIODIC_GROUP_CHECK	600

/* Seconds to wait for backup controller response to REQUEST_CONTROL RPC */
#define CONTROL_TIMEOUT 4

/*****************************************************************************\
 *  General configuration parameters and data structures
\*****************************************************************************/

typedef struct slurmctld_config {
	int	daemonize;
	bool	resume_backup;
	time_t	shutdown_time;
	int	server_thread_count;

	slurm_cred_ctx_t cred_ctx;
#ifdef WITH_PTHREADS
	pthread_mutex_t thread_count_lock;
	pthread_t thread_id_main;
	pthread_t thread_id_sig;
	pthread_t thread_id_rpc;
#else
	int thread_count_lock;
	int thread_id_main;
	int thread_id_sig;
	int thread_id_rpc;
#endif
} slurmctld_config_t;

extern slurmctld_config_t slurmctld_config;
extern slurm_ctl_conf_t slurmctld_conf;
/*****************************************************************************\
 *  NODE parameters and data structures
\*****************************************************************************/
#define CONFIG_MAGIC 0xc065eded
#define NODE_MAGIC   0x0de575ed
struct config_record {
	uint32_t magic;		/* magic cookie to test data integrity */
	uint32_t cpus;		/* count of cpus running on the node */
	uint32_t real_memory;	/* MB real memory on the node */
	uint32_t tmp_disk;	/* MB total storage in TMP_FS file system */
	uint32_t weight;	/* arbitrary priority of node for 
				 * scheduling work on */
	char *feature;		/* arbitrary list of features associated */
	char *nodes;		/* name of nodes with this configuration */
	bitstr_t *node_bitmap;	/* bitmap of nodes with this configuration */
extern List config_list;	/* list of config_record entries */

struct node_record {
	uint32_t magic;			/* magic cookie for data integrity */
	char name[MAX_NAME_LEN];	/* name of the node. NULL==defunct */
	uint16_t node_state;		/* enum node_states, ORed with 
					 * NODE_STATE_NO_RESPOND if not 
					 * responding */
	time_t last_response;		/* last response from the node */
	uint32_t cpus;			/* count of cpus on the node */
	uint32_t real_memory;		/* MB real memory on the node */
	uint32_t tmp_disk;		/* MB total disk in TMP_FS */
	struct config_record *config_ptr;  /* configuration spec ptr */
	struct part_record *partition_ptr; /* partition for this node */
	char comm_name[MAX_NAME_LEN];	/* communications path name to node */
	slurm_addr slurm_addr;		/* network address */
	uint16_t comp_job_cnt;		/* count of jobs completing on node */
	uint16_t run_job_cnt;		/* count of jobs running on node */
	uint16_t no_share_job_cnt;	/* count of jobs running that will
					 * not share nodes */
	char *reason; 			/* why a node is DOWN or DRAINING */
	struct node_record *node_next;	/* next entry with same hash index */ 

extern struct node_record *node_record_table_ptr;  /* ptr to node records */
extern time_t last_bitmap_update;	/* time of last node creation or 
extern time_t last_node_update;		/* time of last node record update */
extern int node_record_count;		/* count in node_record_table_ptr */
extern bitstr_t *avail_node_bitmap;	/* bitmap of available nodes, 
					 * not DOWN, DRAINED or DRAINING */
extern bitstr_t *idle_node_bitmap;	/* bitmap of idle nodes */
extern bitstr_t *share_node_bitmap;	/* bitmap of sharable nodes */
extern struct config_record default_config_record;
extern struct node_record default_node_record;

/*****************************************************************************\
 *  PARTITION parameters and data structures
\*****************************************************************************/
#define PART_MAGIC 0xaefe8495
struct part_record {
	uint32_t magic;		/* magic cookie to test data integrity */
	char name[MAX_NAME_LEN];/* name of the partition */
	uint16_t hidden;	/* 1 if hidden by default */
	uint32_t max_time;	/* minutes or INFINITE */
	uint32_t max_nodes;	/* per job or INFINITE */
	uint32_t min_nodes;	/* per job */
	uint32_t total_nodes;	/* total number of nodes in the partition */
	uint32_t total_cpus;	/* total number of cpus in the partition */
	uint16_t root_only;	/* 1 if allocate/submit RPC can only be 
				   issued by user root */
	uint16_t shared;	/* 1 if job can share a node,
				   2 if sharing required */
	uint16_t state_up;	/* 1 if state is up, 0 if down */
	char *nodes;		/* comma delimited list names of nodes */
	char *allow_groups;	/* comma delimited list of groups, 
				 * NULL indicates all */
	uid_t *allow_uids;	/* zero terminated list of allowed users */
	bitstr_t *node_bitmap;	/* bitmap of nodes in partition */
};

extern List part_list;			/* list of part_record entries */
extern time_t last_part_update;		/* time of last part_list update */
extern struct part_record default_part;	/* default configuration values */
extern char default_part_name[MAX_NAME_LEN];	/* name of default partition */
extern struct part_record *default_part_loc;	/* default partition ptr */
/*****************************************************************************\
 *  JOB parameters and data structures
\*****************************************************************************/
extern time_t last_job_update;	/* time of last update to part records */

#define DETAILS_MAGIC 0xdea84e7
#define JOB_MAGIC 0xf0b7392c
#define STEP_MAGIC 0xce593bc1

extern int job_count;			/* number of jobs in the system */

/* job_details - specification of a job's constraints, 
 * can be purged after initiation */
struct job_details {
	uint32_t magic;			/* magic cookie for data integrity */
	uint32_t min_nodes;		/* minimum number of nodes */
	uint32_t max_nodes;		/* maximum number of nodes */
	char *req_nodes;		/* required nodes */
	char *exc_nodes;		/* excluded nodes */
	bitstr_t *req_node_bitmap;	/* bitmap of required nodes */
	bitstr_t *exc_node_bitmap;	/* bitmap of excluded nodes */
	char *features;			/* required features */
	uint16_t req_tasks;		/* required number of tasks */
	uint16_t shared;		/* set node can be shared*/
	uint16_t contiguous;		/* set if requires contiguous nodes */
	uint16_t wait_reason;		/* reason job still pending, see
					 * slurm.h:enum job_wait_reason */
	uint32_t min_procs;		/* minimum processors per node */
	uint32_t min_memory;		/* minimum memory per node, MB */
	uint32_t min_tmp_disk;		/* minimum tempdisk per node, MB */
	char *err;			/* pathname of job's stderr file */
	char *in;			/* pathname of job's stdin file */
	char *out;			/* pathname of job's stdout file */
	uint32_t total_procs;		/* number of allocated processors, 
					   for accounting */
	time_t submit_time;		/* time of submission */
	char *work_dir;			/* pathname of working directory */
	char **argv;			/* arguments for a batch job script */
	uint16_t argc;			/* count of argv elements */
	uint32_t job_id;		/* job ID */
	uint32_t magic;			/* magic cookie for data integrity */
	char name[MAX_NAME_LEN];	/* name of the job */
	char partition[MAX_NAME_LEN];	/* name of the partition */
	struct part_record *part_ptr;	/* pointer to the partition record */
	uint16_t batch_flag;		/* 1 if batch job (with script) */
	uint32_t user_id;		/* user the job runs as */
	uint32_t group_id;		/* group submitted under */
	enum job_states job_state;	/* state of the job */
	uint16_t kill_on_node_fail;	/* 1 if job should be killed on 
	uint16_t kill_on_step_done;	/* 1 if job should be killed when 
					 * the job step completes, 2 if kill
					 * in progress */
	select_jobinfo_t select_jobinfo;	/* opaque data */
	char *nodes;			/* list of nodes allocated to job */
	bitstr_t *node_bitmap;		/* bitmap of nodes allocated to job */
	uint32_t num_procs;		/* count of required/allocated processors */
	uint32_t time_limit;		/* time_limit minutes or INFINITE,
					 * NO_VAL implies partition max_time */
	time_t start_time;		/* time execution begins, 
	time_t end_time;		/* time of termination, 
	time_t time_last_active;	/* time of last job activity */
	uint32_t priority;		/* relative priority of the job,
					 * zero == held (don't initiate) */
	struct job_details *details;	/* job details */
	uint16_t num_cpu_groups;	/* record count in cpus_per_node and 
Loading
Loading full blame...