Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#ifndef __SLURM_CONTROLLER_SCHED_PLUGIN_API_H__
#define __SLURM_CONTROLLER_SCHED_PLUGIN_API_H__
#include <slurm/slurm.h>
/*
* Initialize the external scheduler adapter.
*
* Returns a SLURM errno.
*/
int slurm_sched_init( void );
/*
**************************************************************************
* P L U G I N C A L L S *
**************************************************************************
*/
/*
* For passive schedulers, invoke a scheduling pass.
*/
int slurm_sched_schedule( void );
/*
* Supply the initial SLURM priority for a newly-submitted job.
*/
u_int32_t slurm_sched_initial_priority( void );
/*
**************************************************************************
* U P C A L L S *
**************************************************************************
*/
/*
* Returns the port number associated with the remote scheduler. The
* port may either be the remote port on which the scheduler listens,
* or the local port upon which the controller should listen for
* scheduler requests. The interpretation of this value depends on
* the scheduler type. The value is returned in host byte order.
*/
const u_int16_t sched_get_port( void );
/*
* Returns the authentication credentials.
*/
const char * const sched_get_auth( void );
/*
* Opaque type for a list of objects supplied by the controller.
* These objects are either jobs in the job queue, or nodes in the
* cluster.
*/
typedef struct sched_obj_list *sched_obj_list_t;
/* Functional type for a field accessor. */
typedef void * (*sched_accessor_fn_t)( sched_obj_list_t,
int32_t,
char * );
/*
* Functional type for an object list (i.e., job queue or node list)
* retriever. This is for abstracting sched_get_node_list() and
* sched_get_job_list() to facilitate any plugin that may wish to
* consolidate code.
*/
typedef sched_obj_list_t (*sched_objlist_fn_t)( void );
/*
* Retrieve a pointer to a function that will, when called with an
* object index, return the value of the named field in the opaque
* object structure. This accessor is guaranteed to be valid for the
* time in which the plugin is loaded and so can be dereferenced once
* at plugin load time.
*
* field (in) - the name of the field whose accessor is to be returned.
*
* Returns a pointer to a function of type
*
* void *func( void *data, uint32_t idx, char *type );
*
* where "data" is the opaque data provided by sched_get_<some>_list(),
* "idx" is the index of the node in "data" whose attribute value is
* desired, and "type" is an optional pointer to a byte in which is
* placed a value identifying the data type of the returned value.
* "type" may be NULL. Returns NULL if no accessor can be provided
* for the named field.
*
* TESTED: 16 May 2003
*/
extern sched_accessor_fn_t sched_get_accessor( char *field );
/*
* Return the number of items in the object list.
*/
extern int32_t sched_get_obj_count( sched_obj_list_t data );
/*
* Free an object list produced by any function of type
* sched_objlist_fn_t.
*
* data (in) - A block of data supplied by sched_get_<whatever>_list().
*
* Returns SLURM_SUCCESS if successful and SLURM_ERROR otherwise.
*
* TESTED: 16 May 2003
*/
extern int sched_free_obj_list( sched_obj_list_t data );
/*
* Retrieve a snapshot of node data from the controller. The data returned
* is guaranteed to be self-consistent. That is, it is guaranteed that the
* data will not have been modified during the acquisition of the snapshot.
* However it is not guaranteed to be persistently accurate. It is accurate
* at the time at which it is delivered to the plugin, but after delivery
* the controller's node list is made available for subsequent operations.
*
* TESTED: 16 May 2003
*/
extern sched_obj_list_t sched_get_node_list( void );
#define NODE_FIELD_NAME "node.name"
#define NODE_FIELD_STATE "node.state"
#define NODE_FIELD_REAL_MEM "node.real_mem"
#define NODE_FIELD_TMP_DISK "node.tmp_disk"
#define NODE_FIELD_NUM_CPUS "node.num_cpus"
#define NODE_FIELD_MOD_TIME "node.mod_time"
#define NODE_FIELD_PARTITION "node.partition"
#define NODE_STATE_LABEL_DOWN "DOWN"
#define NODE_STATE_LABEL_UNKNOWN "UNKNOWN"
#define NODE_STATE_LABEL_IDLE "IDLE"
#define NODE_STATE_LABEL_ALLOCATED "ALLOCATED"
#define NODE_STATE_LABEL_DRAINED "DRAINED"
#define NODE_STATE_LABEL_DRAINING "DRAINING"
#define NODE_STATE_LABEL_COMPLETING "COMPLETING"
/*
* Retrieve a snapshot of the job queue from the controller. The data
* returned is guarantted to be self-consistent. (See
* sched_get_node_list() above.)
*
* data (in/out) - place to store an opaque chunk of job data.
*
* count (in/out ) - place to store the number of jobs that the opaque
* data represents.
*
* Returns SLURM_SUCCESS if successful and SLURM_ERROR otherwise.
*
*/
extern sched_obj_list_t sched_get_job_list( void );
#define JOB_FIELD_ID "job.id"
#define JOB_FIELD_NAME "job.name"
#define JOB_FIELD_LAST_ACTIVE "job.last_active"
#define JOB_FIELD_STATE "job.state"
#define JOB_FIELD_TIME_LIMIT "job.time_limit"
#define JOB_FIELD_NUM_TASKS "job.num_tasks"
#define JOB_FIELD_SUBMIT_TIME "job.submit_time"
#define JOB_FIELD_START_TIME "job.start_time"
#define JOB_FIELD_END_TIME "job.end_time"
#define JOB_FIELD_USER_ID "job.user_id"
#define JOB_FIELD_GROUP_ID "job.group_id"
#define JOB_FIELD_MIN_NODES "job.min_nodes"
#define JOB_FIELD_FEATURES "job.features"
#define JOB_FIELD_PRIORITY "job.priority"
#define JOB_FIELD_WORK_DIR "job.work_dir"
#define JOB_FIELD_PARTITION "job.partition"
#define JOB_FIELD_MIN_DISK "job.min_disk"
#define JOB_FIELD_MIN_MEMORY "job.min_mem"
#define JOB_FIELD_REQ_NODES "job.req_nodes"
#define JOB_FIELD_MIN_NODES "job.min_nodes"
#define JOB_STATE_LABEL_PENDING "PENDING"
#define JOB_STATE_LABEL_RUNNING "RUNNING"
#define JOB_STATE_LABEL_COMPLETE "COMPLETE"
#define JOB_STATE_LABEL_FAILED "FAILED"
#define JOB_STATE_LABEL_TIMEOUT "TIMEOUT"
#define JOB_STATE_LABEL_NODE_FAIL "NODE_FAIL"
/*
* Set the list of nodes on which the job will run.
*
* nodes is a comma-separated string of node names. It is
* copied by sched_set_nodelist().
*/
extern int sched_set_nodelist( const uint32_t job_id, char *nodes );
/*
* Start the job identified by the job ID.
*
* Returns a SLURM errno.
*/
extern int sched_start_job( const uint32_t job_id );
/*
* Stop the job identified by the job ID.
*
* Returns a SLURM errno.
*/
extern int sched_cancel_job( const uint32_t job_id );
#endif /*__SLURM_CONTROLLER_SCHED_PLUGIN_API_H__*/