Skip to content
Snippets Groups Projects
sched_plugin.h 6.61 KiB
Newer Older
#ifndef __SLURM_CONTROLLER_SCHED_PLUGIN_API_H__
#define __SLURM_CONTROLLER_SCHED_PLUGIN_API_H__

#include <slurm/slurm.h>

/*
 * Initialize the external scheduler adapter.
 *
 * Returns a SLURM errno.
 */
int slurm_sched_init( void );

/*
 **************************************************************************
 *                          P L U G I N   C A L L S                       *
 **************************************************************************
 */

/*
 * For passive schedulers, invoke a scheduling pass.
 */
int slurm_sched_schedule( void );

/*
 * Supply the initial SLURM priority for a newly-submitted job.
 */
u_int32_t slurm_sched_initial_priority( void );

/*
 **************************************************************************
 *                              U P C A L L S                             *
 **************************************************************************
 */

/*
 * Returns the port number associated with the remote scheduler.  The
 * port may either be the remote port on which the scheduler listens,
 * or the local port upon which the controller should listen for
 * scheduler requests.  The interpretation of this value depends on
 * the scheduler type.  The value is returned in host byte order.
 */
const u_int16_t sched_get_port( void );

/*
 * Returns the authentication credentials.
 */
const char * const sched_get_auth( void );

/*
 * Opaque type for a list of objects supplied by the controller.
 * These objects are either jobs in the job queue, or nodes in the
 * cluster.
 */
typedef struct sched_obj_list *sched_obj_list_t;

/* Functional type for a field accessor. */
typedef void * (*sched_accessor_fn_t)( sched_obj_list_t,
				       int32_t,
				       char * );

/*
 * Functional type for an object list (i.e., job queue or node list)
 * retriever.  This is for abstracting sched_get_node_list() and
 * sched_get_job_list() to facilitate any plugin that may wish to
 * consolidate code.
 */
typedef sched_obj_list_t (*sched_objlist_fn_t)( void );

/*
 * Retrieve a pointer to a function that will, when called with an
 * object index, return the value of the named field in the opaque
 * object structure.  This accessor is guaranteed to be valid for the
 * time in which the plugin is loaded and so can be dereferenced once
 * at plugin load time.
 *
 * field (in) - the name of the field whose accessor is to be returned.
 *
 * Returns a pointer to a function of type
 *
 *	void *func( void *data, uint32_t idx, char *type );
 *
 * where "data" is the opaque data provided by sched_get_<some>_list(),
 * "idx" is the index of the node in "data" whose attribute value is
 * desired, and "type" is an optional pointer to a byte in which is
 * placed a value identifying the data type of the returned value.
 * "type" may be NULL.  Returns NULL if no accessor can be provided
 * for the named field.
 *
 * TESTED: 16 May 2003
 */
extern sched_accessor_fn_t sched_get_accessor( char *field );

/*
 * Return the number of items in the object list.
 */
extern int32_t sched_get_obj_count( sched_obj_list_t data );

/*
 * Free an object list produced by any function of type
 * sched_objlist_fn_t. 
 *
 * data (in) - A block of data supplied by sched_get_<whatever>_list().
 *
 * Returns SLURM_SUCCESS if successful and SLURM_ERROR otherwise.
 *
 * TESTED: 16 May 2003
 */
extern int sched_free_obj_list( sched_obj_list_t data );


/*
 * Retrieve a snapshot of node data from the controller.  The data returned
 * is guaranteed to be self-consistent.  That is, it is guaranteed that the
 * data will not have been modified during the acquisition of the snapshot.
 * However it is not guaranteed to be persistently accurate.  It is accurate
 * at the time at which it is delivered to the plugin, but after delivery
 * the controller's node list is made available for subsequent operations.
 *
 * TESTED: 16 May 2003
 */
extern sched_obj_list_t sched_get_node_list( void );

#define NODE_FIELD_NAME			"node.name"
#define NODE_FIELD_STATE		"node.state"
#define NODE_FIELD_REAL_MEM		"node.real_mem"
#define NODE_FIELD_TMP_DISK		"node.tmp_disk"
#define NODE_FIELD_NUM_CPUS		"node.num_cpus"
#define NODE_FIELD_MOD_TIME		"node.mod_time"
#define NODE_FIELD_PARTITION		"node.partition"

#define NODE_STATE_LABEL_DOWN		"DOWN"
#define NODE_STATE_LABEL_UNKNOWN       	"UNKNOWN"
#define NODE_STATE_LABEL_IDLE		"IDLE"
#define NODE_STATE_LABEL_ALLOCATED     	"ALLOCATED"
#define NODE_STATE_LABEL_DRAINED       	"DRAINED"
#define NODE_STATE_LABEL_DRAINING      	"DRAINING"
#define NODE_STATE_LABEL_COMPLETING    	"COMPLETING"


/*
 * Retrieve a snapshot of the job queue from the controller.  The data
 * returned is guarantted to be self-consistent.  (See
 * sched_get_node_list() above.)
 *
 * data (in/out) - place to store an opaque chunk of job data.
 *
 * count (in/out ) - place to store the number of jobs that the opaque
 *	data represents. 
 *
 * Returns SLURM_SUCCESS if successful and SLURM_ERROR otherwise.
 *
 */
extern sched_obj_list_t sched_get_job_list( void );

#define JOB_FIELD_ID			"job.id"
#define JOB_FIELD_NAME			"job.name"
#define JOB_FIELD_LAST_ACTIVE  		"job.last_active"
#define JOB_FIELD_STATE			"job.state"
#define JOB_FIELD_TIME_LIMIT   		"job.time_limit"
#define JOB_FIELD_NUM_TASKS		"job.num_tasks"
#define JOB_FIELD_SUBMIT_TIME  		"job.submit_time"
#define JOB_FIELD_START_TIME   		"job.start_time"
#define JOB_FIELD_END_TIME     		"job.end_time"
#define JOB_FIELD_USER_ID      		"job.user_id"
#define JOB_FIELD_GROUP_ID		"job.group_id"
#define JOB_FIELD_MIN_NODES    		"job.min_nodes"
#define JOB_FIELD_FEATURES     		"job.features"
#define JOB_FIELD_PRIORITY     		"job.priority"
#define JOB_FIELD_WORK_DIR     		"job.work_dir"
#define JOB_FIELD_PARTITION	       	"job.partition"
#define JOB_FIELD_MIN_DISK 		"job.min_disk"
#define JOB_FIELD_MIN_MEMORY	       	"job.min_mem"
#define JOB_FIELD_REQ_NODES		"job.req_nodes"
#define JOB_FIELD_MIN_NODES		"job.min_nodes"

#define JOB_STATE_LABEL_PENDING		"PENDING"
#define JOB_STATE_LABEL_RUNNING		"RUNNING"
#define JOB_STATE_LABEL_COMPLETE	"COMPLETE"
#define JOB_STATE_LABEL_FAILED		"FAILED"
#define JOB_STATE_LABEL_TIMEOUT		"TIMEOUT"
#define JOB_STATE_LABEL_NODE_FAIL	"NODE_FAIL"


/*
 * Set the list of nodes on which the job will run.
 *
 * nodes is a comma-separated string of node names.  It is
 * copied by sched_set_nodelist().
 */
extern int sched_set_nodelist( const uint32_t job_id, char *nodes );

/*
 * Start the job identified by the job ID.
 *
 * Returns a SLURM errno.
 */
extern int sched_start_job( const uint32_t job_id );

/*
 * Stop the job identified by the job ID.
 *
 * Returns a SLURM errno.
 */
extern int sched_cancel_job( const uint32_t job_id );

#endif /*__SLURM_CONTROLLER_SCHED_PLUGIN_API_H__*/