Newer
Older
/*****************************************************************************\
* sched_plugin.h - Define scheduler plugin functions.
*****************************************************************************
* Copyright (C) 2004 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* UCRL-CODE-2002-040.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.llnl.gov/linux/slurm/>.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
\*****************************************************************************/
#ifndef __SLURM_CONTROLLER_SCHED_PLUGIN_API_H__
#define __SLURM_CONTROLLER_SCHED_PLUGIN_API_H__
#include <slurm/slurm.h>
/*
* Initialize the external scheduler adapter.
*
* Returns a SLURM errno.
*/
int slurm_sched_init( void );
/*
* Terminate external scheduler, free memory.
*
* Returns a SLURM errno.
*/
extern int slurm_sched_fini(void);
/*
**************************************************************************
* P L U G I N C A L L S *
**************************************************************************
*/
/*
* For passive schedulers, invoke a scheduling pass.
*/
int slurm_sched_schedule( void );
/*
* Supply the initial SLURM priority for a newly-submitted job.
*/
u_int32_t slurm_sched_initial_priority( u_int32_t max_prio );
/*
* Note that some job is pending.
*/
void slurm_sched_job_is_pending( void );
/*
* Return any plugin-specific error number
*/
int slurm_sched_p_get_errno( void );
/*
* Return any plugin-specific error description
*/
char *slurm_sched_p_strerror( int errnum );
/*
**************************************************************************
* U P C A L L S *
**************************************************************************
*/
/*
* Returns the port number associated with the remote scheduler. The
* port may either be the remote port on which the scheduler listens,
* or the local port upon which the controller should listen for
* scheduler requests. The interpretation of this value depends on
* the scheduler type. The value is returned in host byte order.
*/
const u_int16_t sched_get_port( void );
/*
* Returns the authentication credentials.
*/
const char * const sched_get_auth( void );
/*
* RootOnly partitions are typically exempted from external scheduling
* because these partitions are expected to be directly maintained by
* the root user (or some external meta-scheduler) that may have its
* own mechanisms for scheduling. However some cluster configurations
* may want to use RootOnly partitions simply to prevent non-root
* access, and would still like normal external scheduler operation to
* occur.
*
* This procedure reflects the "SchedulerRootFilter" setting in
* slurm.conf which allows the SLURM configuration to request how
* external schedulers handle RootOnly partition, if supported by
* the external scheduler. Currently only the SLURM backfill
* scheduler makes use of this.
*
* Returns non-zero if RootOnly partitions are to be filtered from
* any external scheduling efforts.
*/
const u_int16_t sched_get_root_filter( void );
/*
* Opaque type for a list of objects supplied by the controller.
* These objects are either jobs in the job queue, or nodes in the
* cluster.
*/
typedef struct sched_obj_list *sched_obj_list_t;
/* Functional type for a field accessor. */
typedef void * (*sched_accessor_fn_t)( sched_obj_list_t,
int32_t,
char * );
/*
* Functional type for an object list (i.e., job queue or node list)
* retriever. This is for abstracting sched_get_node_list() and
* sched_get_job_list() to facilitate any plugin that may wish to
* consolidate code.
*/
typedef sched_obj_list_t (*sched_objlist_fn_t)( void );
/*
* Retrieve a pointer to a function that will, when called with an

jwindley
committed
* object index, return a poiner to the value of the named field in
* the opaque object structure. This accessor is guaranteed to be
* valid for the time in which the plugin is loaded and so can be
* dereferenced once at plugin load time. The return value is always
* to be interpreted as a pointer, regardless of the size of the
* pointed-to type.
*
* field (in) - the name of the field whose accessor is to be returned.
*
* Returns a pointer to a function of type
*
* void *func( void *data, uint32_t idx, char *type );
*
* where "data" is the opaque data provided by sched_get_<some>_list(),
* "idx" is the index of the node in "data" whose attribute value is
* desired, and "type" is an optional pointer to a byte in which is
* placed a value identifying the data type of the returned value.
* "type" may be NULL. Returns NULL if no accessor can be provided
* for the named field.
*

jwindley
committed
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
* The values placed into "type", if addressable, are ASCII-encoded
* characters with the following meanings:
*
* 'e' - An enumeration encoded as a string. This is for data that is
* stored internally in SLURM as a C enum, but which for version skew
* reasons we do not want to transmit in its underyling numerical
* representation. The strings used are defined below in conjunction
* with the objects and fields to which they apply.
*
* 's' - A string value that should probably be passed to the
* scheduler without further interpretation.
*
* 'S' - A string value that may require interpretation by the plugin
* prior to passing it to the scheduler. This includes strings that
* have embedded delimetersor other structure. The difference between
* 's' and 'S' is fairly arbitrary and merely serves as a hint to the
* plugin about the format of the returned string.
*
* 't' - A value of type "time_t" as defined on the SLURM controller.
*
* 'i' - A 16-bit signed integer.
*
* 'I' - A 32-bit signed integer.
*
* 'u' - A 16-bit unsigned integer.
*
* 'U' - a 32-bit unsigned integer.
*
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
* TESTED: 16 May 2003
*/
extern sched_accessor_fn_t sched_get_accessor( char *field );
/*
* Return the number of items in the object list.
*/
extern int32_t sched_get_obj_count( sched_obj_list_t data );
/*
* Free an object list produced by any function of type
* sched_objlist_fn_t.
*
* data (in) - A block of data supplied by sched_get_<whatever>_list().
*
* Returns SLURM_SUCCESS if successful and SLURM_ERROR otherwise.
*
* TESTED: 16 May 2003
*/
extern int sched_free_obj_list( sched_obj_list_t data );
/*
* Retrieve a snapshot of node data from the controller. The data returned
* is guaranteed to be self-consistent. That is, it is guaranteed that the
* data will not have been modified during the acquisition of the snapshot.
* However it is not guaranteed to be persistently accurate. It is accurate
* at the time at which it is delivered to the plugin, but after delivery
* the controller's node list is made available for subsequent operations.
*
* TESTED: 16 May 2003
*/
extern sched_obj_list_t sched_get_node_list( void );
#define NODE_FIELD_NAME "node.name"
#define NODE_FIELD_STATE "node.state"
#define NODE_FIELD_REAL_MEM "node.real_mem"
#define NODE_FIELD_TMP_DISK "node.tmp_disk"
#define NODE_FIELD_NUM_CPUS "node.num_cpus"
#define NODE_FIELD_MOD_TIME "node.mod_time"
#define NODE_FIELD_PARTITION "node.partition"
#define NODE_STATE_LABEL_DOWN "DOWN"
#define NODE_STATE_LABEL_UNKNOWN "UNKNOWN"
#define NODE_STATE_LABEL_IDLE "IDLE"
#define NODE_STATE_LABEL_ALLOCATED "ALLOCATED"
#define NODE_STATE_LABEL_DRAINED "DRAINED"
#define NODE_STATE_LABEL_DRAINING "DRAINING"
#define NODE_STATE_LABEL_COMPLETING "COMPLETING"
/*
* Retrieve a snapshot of the job queue from the controller. The data
* returned is guarantted to be self-consistent. (See
* sched_get_node_list() above.)
*
* data (in/out) - place to store an opaque chunk of job data.
*
* count (in/out ) - place to store the number of jobs that the opaque
* data represents.
*
* Returns SLURM_SUCCESS if successful and SLURM_ERROR otherwise.
*
*/
extern sched_obj_list_t sched_get_job_list( void );
#define JOB_FIELD_ID "job.id"
#define JOB_FIELD_NAME "job.name"
#define JOB_FIELD_LAST_ACTIVE "job.last_active"
#define JOB_FIELD_STATE "job.state"
#define JOB_FIELD_TIME_LIMIT "job.time_limit"
#define JOB_FIELD_NUM_TASKS "job.num_tasks"
#define JOB_FIELD_SUBMIT_TIME "job.submit_time"
#define JOB_FIELD_START_TIME "job.start_time"
#define JOB_FIELD_END_TIME "job.end_time"
#define JOB_FIELD_USER_ID "job.user_id"
#define JOB_FIELD_GROUP_ID "job.group_id"
#define JOB_FIELD_MIN_NODES "job.min_nodes"
#define JOB_FIELD_FEATURES "job.features"
#define JOB_FIELD_PRIORITY "job.priority"
#define JOB_FIELD_WORK_DIR "job.work_dir"
#define JOB_FIELD_PARTITION "job.partition"
#define JOB_FIELD_MIN_DISK "job.min_disk"
#define JOB_FIELD_MIN_MEMORY "job.min_mem"
#define JOB_FIELD_REQ_NODES "job.req_nodes"
#define JOB_FIELD_MIN_NODES "job.min_nodes"
#define JOB_STATE_LABEL_PENDING "PENDING"
#define JOB_STATE_LABEL_RUNNING "RUNNING"
#define JOB_STATE_LABEL_COMPLETE "COMPLETE"
#define JOB_STATE_LABEL_FAILED "FAILED"
#define JOB_STATE_LABEL_TIMEOUT "TIMEOUT"
#define JOB_STATE_LABEL_NODE_FAIL "NODE_FAIL"
/*
* Set the list of nodes on which the job will run.
*
* nodes is a comma-separated string of node names. It is
* copied by sched_set_nodelist().
*/
extern int sched_set_nodelist( const uint32_t job_id, char *nodes );
/*
* Start the job identified by the job ID.
*
* Returns a SLURM errno.
*/
extern int sched_start_job( const uint32_t job_id, const uint32_t new_prio );