Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
de153052
Commit
de153052
authored
22 years ago
by
Mark Grondona
Browse files
Options
Downloads
Patches
Plain Diff
o added slurmctld.h: the master slurmctld header
parent
593822a7
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/slurmctld/slurmctld.h
+921
-0
921 additions, 0 deletions
src/slurmctld/slurmctld.h
with
921 additions
and
0 deletions
src/slurmctld/slurmctld.h
0 → 100644
+
921
−
0
View file @
de153052
/*
* slurmctld.h - definitions for slurmcltd use
*
* NOTE: the job, node, and partition specifications are all of the
* same basic format:
* if the first character of a line is "#" then it is a comment.
* place all information for a single node, partition, or job on a
* single line.
* space delimit collection of keywords and values and separate
* the keyword from value with an equal sign (e.g. "cpus=3").
* list entries should be comma separated (e.g. "nodes=lx01,lx02").
*
* see the slurm administrator guide for more details.
*/
#ifndef _HAVE_SLURM_H
#define _HAVE_SLURM_H
#include
<pthread.h>
#include
<stdlib.h>
#include
<time.h>
#include
<sys/types.h>
#include
"list.h"
#include
"log.h"
#include
"bitstring.h"
#include
"xmalloc.h"
#include
"pack.h"
#include
<src/api/slurm.h>
#define DEBUG_SYSTEM 1
#define BACKUP_INTERVAL 60
#define BACKUP_LOCATION "/usr/local/slurm/slurm.state"
#define CONTROL_DAEMON "/usr/local/slurm/slurmd.control"
#define CONTROLLER_TIMEOUT 300
#define EPILOG ""
#define FAST_SCHEDULE 1
#define HASH_BASE 10
#define HEARTBEAT_INTERVAL 60
#define INIT_PROGRAM ""
#define KILL_WAIT 30
#define PRIORITIZE ""
#define PROLOG ""
#define SERVER_DAEMON "/usr/local/slurm/slurmd.server"
#define SERVER_TIMEOUT 300
#define SLURM_CONF "../../etc/slurm.conf2"
#define TMP_FS "/tmp"
extern
char
*
control_machine
;
/* name of computer acting as slurm controller */
extern
char
*
backup_controller
;
/* name of computer acting as slurm backup controller */
#define CONFIG_MAGIC 0xc065eded
#define NODE_MAGIC 0x0de575ed
#define NO_VAL 0x7f7f7f7f
struct
config_record
{
uint32_t
magic
;
/* magic cookie to test data integrity */
uint32_t
cpus
;
/* count of cpus running on the node */
uint32_t
real_memory
;
/* megabytes of real memory on the node */
uint32_t
tmp_disk
;
/* megabytes of total storage in TMP_FS file system */
uint32_t
weight
;
/* arbitrary priority of node for scheduling work on */
char
*
feature
;
/* arbitrary list of features associated with a node */
char
*
nodes
;
/* names of nodes in partition configuration record */
bitstr_t
*
node_bitmap
;
/* bitmap of nodes in configuration record */
};
extern
List
config_list
;
/* list of config_record entries */
/* last entry must be "end", keep in sync with node_states */
extern
char
*
node_state_string
[];
extern
time_t
last_bitmap_update
;
/* time of last node creation or deletion */
extern
time_t
last_node_update
;
/* time of last update to node records */
struct
node_record
{
uint32_t
magic
;
/* magic cookie to test data integrity */
char
name
[
MAX_NAME_LEN
];
/* name of the node. a null name indicates defunct node */
uint16_t
node_state
;
/* enum node_states, ORed with STATE_NO_RESPOND if down */
time_t
last_response
;
/* last response from the node */
uint32_t
cpus
;
/* actual count of cpus running on the node */
uint32_t
real_memory
;
/* actual megabytes of real memory on the node */
uint32_t
tmp_disk
;
/* actual megabytes of total disk in TMP_FS */
struct
config_record
*
config_ptr
;
/* configuration specification for this node */
struct
part_record
*
partition_ptr
;
/* partition for this node */
};
extern
struct
node_record
*
node_record_table_ptr
;
/* location of the node records */
extern
int
node_record_count
;
/* count of records in the node record table */
extern
int
*
hash_table
;
/* table of hashed indicies into node_record */
extern
bitstr_t
*
up_node_bitmap
;
/* bitmap of nodes are up */
extern
bitstr_t
*
idle_node_bitmap
;
/* bitmap of nodes are idle */
extern
struct
config_record
default_config_record
;
extern
struct
node_record
default_node_record
;
/* NOTE: change PART_STRUCT_VERSION value whenever the contents of PART_STRUCT_FORMAT change */
#define PART_MAGIC 0xaefe8495
extern
time_t
last_part_update
;
/* time of last update to part records */
struct
part_record
{
uint32_t
magic
;
/* magic cookie to test data integrity */
char
name
[
MAX_NAME_LEN
];
/* name of the partition */
uint32_t
max_time
;
/* minutes or INFINITE */
uint32_t
max_nodes
;
/* per job or INFINITE */
uint32_t
total_nodes
;
/* total number of nodes in the partition */
uint32_t
total_cpus
;
/* total number of cpus in the partition */
uint16_t
key
;
/* 1 if slurm distributed key is required for use of partition */
uint16_t
shared
;
/* 1 if >1 job can share a node, 2 if required */
uint16_t
state_up
;
/* 1 if state is up, 0 if down */
char
*
nodes
;
/* comma delimited list names of nodes in partition */
char
*
allow_groups
;
/* comma delimited list of groups, null indicates all */
bitstr_t
*
node_bitmap
;
/* bitmap of nodes in partition */
};
extern
List
part_list
;
/* list of part_record entries */
extern
struct
part_record
default_part
;
/* default configuration values */
extern
char
default_part_name
[
MAX_NAME_LEN
];
/* name of default partition */
extern
struct
part_record
*
default_part_loc
;
/* location of default partition */
/* NOTE: change JOB_STRUCT_VERSION value whenever the contents of JOB_STRUCT_FORMAT change */
extern
time_t
last_job_update
;
/* time of last update to part records */
extern
time_t
last_step_update
;
/* time of last update to job steps */
/* last entry must be "end", keep in sync with node_state */
extern
char
*
job_state_string
[];
/* Don't accept more jobs once there are MAX_JOB_COUNT in the system */
/* Purge OK for jobs over MIN_JOB_AGE minues old (since completion) */
/* This should prevent exhausting memory */
#define DETAILS_MAGIC 0xdea84e7
#define JOB_MAGIC 0xf0b7392c
#define MAX_JOB_COUNT 1000
#define MIN_JOB_AGE 10
#define STEP_MAGIC 0xce593bc1
extern
int
job_count
;
/* number of jobs in the system */
struct
job_details
{
uint32_t
magic
;
/* magic cookie to test data integrity */
uint32_t
num_procs
;
/* minimum number of processors */
uint32_t
num_nodes
;
/* minimum number of nodes */
char
*
req_nodes
;
/* required nodes */
bitstr_t
*
req_node_bitmap
;
/* bitmap of required nodes */
char
*
features
;
/* required features */
uint16_t
shared
;
/* 1 if more than one job can execute on a node */
uint16_t
contiguous
;
/* requires contiguous nodes, 1=true, 0=false */
uint32_t
min_procs
;
/* minimum processors per node, MB */
uint32_t
min_memory
;
/* minimum memory per node, MB */
uint32_t
min_tmp_disk
;
/* minimum temporary disk per node, MB */
enum
task_dist
dist
;
/* distribution of tasks, 0=fill, 0=cyclic */
char
*
job_script
;
/* name of job script to execute */
uint16_t
procs_per_task
;
/* processors required per task */
uint32_t
total_procs
;
/* total number of allocated processors, for accounting */
char
*
node_list
;
/* comma separated assigned node list (by task) */
time_t
submit_time
;
/* time of submission */
};
struct
job_record
{
uint16_t
job_id
;
/* job ID */
uint32_t
magic
;
/* magic cookie to test data integrity */
char
name
[
MAX_NAME_LEN
];
/* name of the job */
char
partition
[
MAX_NAME_LEN
];
/* name of the partition */
struct
part_record
*
part_ptr
;
/* pointer to the partition record */
uint32_t
user_id
;
/* user the job runs as */
enum
job_states
job_state
;
/* state of the job */
char
*
nodes
;
/* comma delimited list of nodes allocated to job */
bitstr_t
*
node_bitmap
;
/* bitmap of nodes in allocated to job */
uint32_t
time_limit
;
/* maximum run time in minutes, 0xffffffff if unlimited */
time_t
start_time
;
/* time execution begins, actual or expected*/
time_t
end_time
;
/* time of termination, actual or expected */
uint32_t
priority
;
/* relative priority of the job */
struct
job_details
*
details
;
/* job details (set until job terminates) */
uint16_t
next_step_id
;
/* next step id to be used */
};
struct
step_record
{
struct
job_record
*
job_ptr
;
/* pointer to job_record (for job_id) */
uint16_t
step_id
;
/* step number */
uint32_t
magic
;
/* magic cookie to test data integrity */
uint16_t
dist
;
/* task distribution 1=cycle, 0=block */
uint16_t
procs_per_task
;
/* processors required per task */
bitstr_t
*
node_bitmap
;
/* bitmap of nodes in allocated to job step */
#ifdef HAVE_LIBELAN3
struct
qsw_jobinfo
*
qsw_jobinfo_t
;
/* Elan3 switch context, opaque data structure */
#endif
};
extern
List
job_list
;
/* list of job_record entries */
extern
List
step_list
;
/* list of job_step entries */
/* allocate_nodes - for a given bitmap, change the state of specified nodes to stage_in
* this is a simple prototype for testing
* globals: node_record_count - number of nodes in the system
* node_record_table_ptr - pointer to global node table
*/
extern
void
allocate_nodes
(
unsigned
*
bitmap
);
/*
* bitmap2node_name - given a bitmap, build a node name list representation using
* regular expressions
* input: bitmap - bitmap pointer
* node_list - place to put node list
* output: node_list - set to node list or null on error
* returns 0 if no error, otherwise einval or enomem
* NOTE: consider returning the node list as a regular expression if helpful
* NOTE: the caller must free memory at node_list when no longer required
*/
extern
int
bitmap2node_name
(
bitstr_t
*
bitmap
,
char
**
node_list
);
/*
* block_or_cycle - map string into integer
* input: in_string: pointer to string containing "BLOCK" or "CYCLE"
* output: returns 1 for "BLOCK", 0 for "CYCLE", -1 otherwise
*/
extern
enum
task_dist
block_or_cycle
(
char
*
in_string
);
/*
* build_node_list - build a node_list for a job including processor
* count on the node (e.g. "lx01[4],lx02[4],...")
* input: bitmap - bitmap of nodes to use
* node_list - place to store node list
* total_procs - place to store count of total processors allocated
* output: node_list - comma separated list of nodes on which the tasks
* are to be initiated
* total_procs - count of total processors allocated
* global: node_record_table_ptr - pointer to global node table
* NOTE: the storage at node_list must be xfreed by the caller
*/
extern
void
build_node_list
(
bitstr_t
*
bitmap
,
char
**
node_list
,
uint32_t
*
total_procs
);
/*
* count_cpus - report how many cpus are associated with the identified nodes
* input: bitmap - a node bitmap
* output: returns a cpu count
* globals: node_record_count - number of nodes configured
* node_record_table_ptr - pointer to global node table
*/
extern
int
count_cpus
(
unsigned
*
bitmap
);
/*
* create_config_record - create a config_record entry and set is values to the defaults.
* output: returns pointer to the config_record
* global: default_config_record - default configuration values
* NOTE: memory allocated will remain in existence until delete_config_record() is called
* to deletet all configuration records
*/
extern
struct
config_record
*
create_config_record
(
void
);
/*
* create_job_record - create an empty job_record including job_details.
* load its values with defaults (zeros, nulls, and magic cookie)
* input: error_code - location to store error value in
* output: error_code - set to zero if no error, errno otherwise
* returns a pointer to the record or NULL if error
* global: job_list - global job list
* job_count - number of jobs in the system
* NOTE: allocates memory that should be xfreed with either
* delete_job_record or list_delete_job
*/
extern
struct
job_record
*
create_job_record
(
int
*
error_code
);
/*
* create_node_record - create a node record
* input: error_code - location to store error value in
* config_point - pointer to node's configuration information
* node_name - name of the node
* output: returns a pointer to the record or null if error
* note the record's values are initialized to those of default_node_record, node_name and
* config_point's cpus, real_memory, and tmp_disk values
* NOTE: allocates memory that should be freed with delete_part_record
*/
extern
struct
node_record
*
create_node_record
(
struct
config_record
*
config_point
,
char
*
node_name
);
/*
* create_part_record - create a partition record
* output: returns a pointer to the record or NULL if error
* global: default_part - default partition parameters
* part_list - global partition list
* NOTE: the record's values are initialized to those of default_part
* NOTE: allocates memory that should be xfreed with delete_part_record
*/
extern
struct
part_record
*
create_part_record
(
void
);
/*
* create_step_record - create an empty step_record.
* load its values with defaults (zeros, nulls, and magic cookie)
* input: error_code - location to store error value in
* output: error_code - set to zero if no error, errno otherwise
* returns a pointer to the record or NULL if error
* global: step_list - global step list
* NOTE: allocates memory that should be xfreed with delete_step_record
*/
extern
struct
step_record
*
create_step_record
(
int
*
error_code
);
/* deallocate_nodes - for a given bitmap, change the state of specified nodes to idle
* this is a simple prototype for testing
* globals: node_record_count - number of nodes in the system
* node_record_table_ptr - pointer to global node table
*/
extern
void
deallocate_nodes
(
unsigned
*
bitmap
);
/*
* delete_job_details - delete a job's detail record and clear it's pointer
* input: job_entry - pointer to job_record to clear the record of
*/
extern
void
delete_job_details
(
struct
job_record
*
job_entry
);
/*
* delete_job_record - delete record for job with specified job_id
* input: job_id - job_id of the desired job
* output: return 0 on success, errno otherwise
* global: job_list - pointer to global job list
*/
extern
int
delete_job_record
(
uint16_t
job_id
);
/*
* delete_node_record - delete record for node with specified name
* to avoid invalidating the bitmaps and hash table, we just clear the name
* set its state to STATE_DOWN
* input: name - name of the desired node
* output: return 0 on success, errno otherwise
*/
extern
int
delete_node_record
(
char
*
name
);
/*
* delete_part_record - delete record for partition with specified name
* input: name - name of the desired node
* output: return 0 on success, errno otherwise
*/
extern
int
delete_part_record
(
char
*
name
);
/*
* delete_step_record - delete record for job step with specified job_id and step_id
* input: job_id - job_id of the desired job
* step_id - id of the desired job step
* output: return 0 on success, errno otherwise
* global: step_list - global step list
*/
extern
int
delete_step_record
(
uint16_t
job_id
,
uint16_t
step_id
);
/*
* find_job_record - return a pointer to the job record with the given job_id
* input: job_id - requested job's id
* output: pointer to the job's record, NULL on error
* global: job_list - global job list pointer
*/
extern
struct
job_record
*
find_job_record
(
uint16_t
job_id
);
/*
* find_node_record - find a record for node with specified name,
* input: name - name of the desired node
* output: return pointer to node record or null if not found
*/
extern
struct
node_record
*
find_node_record
(
char
*
name
);
/*
* find_part_record - find a record for partition with specified name,
* input: name - name of the desired partition
* output: return pointer to node partition or null if not found
* global: part_list - global partition list
*/
extern
struct
part_record
*
find_part_record
(
char
*
name
);
/*
* find_step_record - return a pointer to the step record with the given job_id and step_id
* input: job_id - requested job's id
* step_id - id of the desired job step
* output: pointer to the job step's record, NULL on error
* global: step_list - global step list
*/
extern
struct
step_record
*
find_step_record
(
uint16_t
job_id
,
uint16_t
step_id
);
/*
* init_job_conf - initialize the job configuration tables and values.
* this should be called after creating node information, but
* before creating any job entries.
* output: return value - 0 if no error, otherwise an error code
* global: last_job_update - time of last job table update
* job_list - pointer to global job list
*/
extern
int
init_job_conf
();
/*
* init_node_conf - initialize the node configuration values.
* this should be called before creating any node or configuration entries.
* output: return value - 0 if no error, otherwise an error code
*/
extern
int
init_node_conf
();
/*
* init_part_conf - initialize the partition configuration values.
* this should be called before creating any partition entries.
* output: return value - 0 if no error, otherwise an error code
*/
extern
int
init_part_conf
();
/*
* init_step_conf - initialize the job step configuration tables and values.
* this should be called before creating any job step entries.
* output: return value - 0 if no error, otherwise an error code
* global: step_list - global step list
*/
extern
int
init_step_conf
();
/*
* init_slurm_conf - initialize or re-initialize the slurm configuration
* values. this should be called before calling read_slurm_conf.
* output: return value - 0 if no error, otherwise an error code
* globals: control_machine - name of primary slurmctld machine
* backup_controller - name of backup slurmctld machine
*/
extern
int
init_slurm_conf
();
/*
* is_key_valid - determine if supplied key is valid
* input: key - a slurm key acquired by user root
* output: returns 1 if key is valid, 0 otherwise
* NOTE: this is only a placeholder for a future function
*/
extern
int
is_key_valid
(
int
key
);
/*
* job_allocate - parse the suppied job specification, create job_records for it,
* and allocate nodes for it. if the job can not be immediately allocated
* nodes, EAGAIN will be returned
* input: job_specs - job specifications
* new_job_id - location for storing new job's id
* node_list - location for storing new job's allocated nodes
* output: new_job_id - the job's ID
* node_list - list of nodes allocated to the job
* returns 0 on success, EINVAL if specification is invalid,
* EAGAIN if higher priority jobs exist
* globals: job_list - pointer to global job list
* list_part - global list of partition info
* default_part_loc - pointer to default partition
* NOTE: the calling program must xfree the memory pointed to by node_list
*/
extern
int
job_allocate
(
char
*
job_specs
,
uint16_t
*
new_job_id
,
char
**
node_list
);
/*
* job_cancel - cancel the specified job
* input: job_id - id of the job to be cancelled
* output: returns 0 on success, EINVAL if specification is invalid
* EAGAIN of job available for cancellation now
* global: job_list - pointer global job list
* last_job_update - time of last job table update
*/
extern
int
job_cancel
(
uint16_t
job_id
);
/*
* job_create - parse the suppied job specification and create job_records for it
* input: job_specs - job specifications
* new_job_id - location for storing new job's id
* output: new_job_id - the job's ID
* returns 0 on success, EINVAL if specification is invalid
* allocate - if set, job allocation only (no script required)
* globals: job_list - pointer to global job list
* list_part - global list of partition info
* default_part_loc - pointer to default partition
*/
extern
int
job_create
(
char
*
job_specs
,
uint16_t
*
new_job_id
,
int
allocate
);
/* job_lock - lock the job information */
extern
void
job_lock
();
/* job_unlock - unlock the job information */
extern
void
job_unlock
();
/* list_compare_config - compare two entry from the config list based upon weight,
* see list.h for documentation */
extern
int
list_compare_config
(
void
*
config_entry1
,
void
*
config_entry2
);
/* list_delete_config - delete an entry from the configuration list,
*see list.h for documentation */
extern
void
list_delete_config
(
void
*
config_entry
);
/* list_find_config - find an entry in the configuration list,
* see list.h for documentation
* key is partition name or "universal_key" for all configuration */
extern
int
list_find_config
(
void
*
config_entry
,
void
*
key
);
/* list_delete_part - delete an entry from the partition list,
* see list.h for documentation */
extern
void
list_delete_part
(
void
*
part_entry
);
/* list_find_part - find an entry in the partition list,
* see list.h for documentation
* key is partition name or "universal_key" for all partitions */
extern
int
list_find_part
(
void
*
part_entry
,
void
*
key
);
/*
* load_float - location into which result is stored
* keyword - string to search for
* in_line - string to search for keyword
* output: *destination - set to value, no change if value not found
* in_line - the keyword and value (if present) are overwritten by spaces
* return value - 0 if no error, otherwise an error code
* NOTE: in_line is overwritten, do not use a constant
*/
extern
int
load_float
(
float
*
destination
,
char
*
keyword
,
char
*
in_line
);
/*
* load_integer - parse a string for a keyword, value pair
* input: *destination - location into which result is stored
* keyword - string to search for
* in_line - string to search for keyword
* output: *destination - set to value, no change if value not found,
* set to 1 if keyword found without value,
* set to -1 if keyword followed by "unlimited"
* in_line - the keyword and value (if present) are overwritten by spaces
* return value - 0 if no error, otherwise an error code
* NOTE: in_line is overwritten, do not use a constant
*/
extern
int
load_integer
(
int
*
destination
,
char
*
keyword
,
char
*
in_line
);
/*
* load_string - parse a string for a keyword, value pair
* input: *destination - location into which result is stored
* keyword - string to search for
* in_line - string to search for keyword
* output: *destination - set to value, no change if value not found,
* if *destination had previous value, that memory location is automatically freed
* in_line - the keyword and value (if present) are overwritten by spaces
* return value - 0 if no error, otherwise an error code
* NOTE: destination must be free when no longer required
* NOTE: if destination is non-null at function call time, it will be freed
* NOTE: in_line is overwritten, do not use a constant
*/
extern
int
load_string
(
char
**
destination
,
char
*
keyword
,
char
*
in_line
);
/*
* match_feature - determine if the desired feature (seek) is one of those available
* input: seek - desired feature
* available - comma separated list of features
* output: returns 1 if found, 0 otherwise
*/
extern
int
match_feature
(
char
*
seek
,
char
*
available
);
/*
* match_group - determine if the user is a member of any groups permitted to use this partition
* input: allow_groups - comma delimited list of groups permitted to use the partition,
* NULL is for all groups
* user_groups - comma delimited list of groups the user belongs to
* output: returns 1 if user is member, 0 otherwise
*/
extern
int
match_group
(
char
*
allow_groups
,
char
*
user_groups
);
/* node_lock - lock the node and configuration information */
extern
void
node_lock
();
/* node_unlock - unlock the node and configuration information */
extern
void
node_unlock
();
/*
* node_name2bitmap - given a node name regular expression, build a bitmap representation
* input: node_names - list of nodes
* bitmap - place to put bitmap pointer
* output: bitmap - set to bitmap or null on error
* returns 0 if no error, otherwise EINVAL or ENOMEM
* NOTE: the caller must free memory at bitmap when no longer required
*/
extern
int
node_name2bitmap
(
char
*
node_names
,
bitstr_t
**
bitmap
);
/*
* node_name2list - given a node name regular expression, build an
* array of node names
* input: node_names - list of nodes
* node_list - location into which the list is placed
* node_count - location into which a node count is passed
* output: node_list - an array of node names, each of size MAX_NAME_LEN
* node_count - the number of entries in node_list
* returns 0 if no error, otherwise EINVAL or enomem
* NOTE: the caller must xfree memory at node_list when no longer required iff no error
*/
extern
int
node_name2list
(
char
*
node_names
,
char
**
node_list
,
int
*
node_count
);
/*
* pack_all_jobs - dump all job information for all jobs in
* machine independent form (for network transmission)
* input: buffer_ptr - location into which a pointer to the data is to be stored.
* the calling function must xfree the storage.
* buffer_size - location into which the size of the created buffer is in bytes
* update_time - dump new data only if job records updated since time
* specified, otherwise return empty buffer
* output: buffer_ptr - the pointer is set to the allocated buffer.
* buffer_size - set to size of the buffer in bytes
* update_time - set to time partition records last updated
* returns 0 if no error, errno otherwise
* global: job_list - global list of job records
* NOTE: the buffer at *buffer_ptr must be xfreed by the caller
* NOTE: change JOB_STRUCT_VERSION in common/slurmlib.h whenever the format changes
* NOTE: change slurm_load_job() in api/job_info.c whenever the data format changes
*/
extern
int
pack_all_jobs
(
char
**
buffer_ptr
,
int
*
buffer_size
,
time_t
*
update_time
);
/*
* pack_all_node - dump all configuration and node information for all nodes in
* machine independent form (for network transmission)
* input: buffer_ptr - location into which a pointer to the data is to be stored.
* the data buffer is actually allocated by dump_node and the
* calling function must xfree the storage.
* buffer_size - location into which the size of the created buffer is in bytes
* update_time - dump new data only if partition records updated since time
* specified, otherwise return empty buffer
* output: buffer_ptr - the pointer is set to the allocated buffer.
* buffer_size - set to size of the buffer in bytes
* update_time - set to time partition records last updated
* returns 0 if no error, errno otherwise
* global: node_record_table_ptr - pointer to global node table
* NOTE: the caller must xfree the buffer at *buffer_ptr when no longer required
*/
extern
int
pack_all_node
(
char
**
buffer_ptr
,
int
*
buffer_size
,
time_t
*
update_time
);
/*
* pack_all_part - dump all partition information for all partitions in
* machine independent form (for network transmission)
* input: buffer_ptr - location into which a pointer to the data is to be stored.
* the calling function must xfree the storage.
* buffer_size - location into which the size of the created buffer is in bytes
* update_time - dump new data only if partition records updated since time
* specified, otherwise return empty buffer
* output: buffer_ptr - the pointer is set to the allocated buffer.
* buffer_size - set to size of the buffer in bytes
* update_time - set to time partition records last updated
* returns 0 if no error, errno otherwise
* global: part_list - global list of partition records
* NOTE: the buffer at *buffer_ptr must be xfreed by the caller
* NOTE: change PART_STRUCT_VERSION in common/slurmlib.h whenever the format changes
* NOTE: change slurm_load_part() in api/part_info.c whenever the data format changes
*/
extern
int
pack_all_part
(
char
**
buffer_ptr
,
int
*
buffer_size
,
time_t
*
update_time
);
/*
* pack_all_step - dump all job step information for all steps in
* machine independent form (for network transmission)
* input: buffer_ptr - location into which a pointer to the data is to be stored.
* the calling function must xfree the storage.
* buffer_size - location into which the size of the created buffer is in bytes
* update_time - dump new data only if partition records updated since time
* specified, otherwise return empty buffer
* output: buffer_ptr - the pointer is set to the allocated buffer.
* buffer_size - set to size of the buffer in bytes
* update_time - set to time partition records last updated
* returns 0 if no error, errno otherwise
* global: step_list - global list of partition records
* NOTE: the buffer at *buffer_ptr must be xfreed by the caller
* NOTE: change STEP_STRUCT_VERSION in common/slurmlib.h whenever the format changes
* NOTE: change slurm_load_step() in api/step_info.c whenever the data format changes
*/
extern
int
pack_all_step
(
char
**
buffer_ptr
,
int
*
buffer_size
,
time_t
*
update_time
);
/*
* pack_job - dump all configuration information about a specific job in
* machine independent form (for network transmission)
* input: dump_job_ptr - pointer to job for which information is requested
* buf_ptr - buffer for job information
* buf_len - byte size of buffer
* output: buf_ptr - advanced to end of data written
* buf_len - byte size remaining in buffer
* return 0 if no error, 1 if buffer too small
* NOTE: change JOB_STRUCT_VERSION in common/slurmlib.h whenever the format changes
* NOTE: change slurm_load_job() in api/job_info.c whenever the data format changes
*/
extern
int
pack_job
(
struct
job_record
*
dump_job_ptr
,
void
**
buf_ptr
,
int
*
buf_len
);
/*
* pack_node - dump all configuration information about a specific node in
* machine independent form (for network transmission)
* input: dump_node_ptr - pointer to node for which information is requested
* buf_ptr - buffer for node information
* buf_len - byte size of buffer
* output: buf_ptr - advanced to end of data written
* buf_len - byte size remaining in buffer
* return 0 if no error, 1 if out_line buffer too small
* NOTE: if you make any changes here be sure to increment the value of NODE_STRUCT_VERSION
* and make the corresponding changes to load_node_config in api/node_info.c
*/
extern
int
pack_node
(
struct
node_record
*
dump_node_ptr
,
void
**
buf_ptr
,
int
*
buf_len
);
/*
* pack_part - dump all configuration information about a specific partition in
* machine independent form (for network transmission)
* input: dump_part_ptr - pointer to partition for which information is requested
* buf_ptr - buffer for node information
* buf_len - byte size of buffer
* output: buf_ptr - advanced to end of data written
* buf_len - byte size remaining in buffer
* return 0 if no error, 1 if buffer too small
* NOTE: if you make any changes here be sure to increment the value of PART_STRUCT_VERSION
* and make the corresponding changes to load_part_config in api/partition_info.c
*/
extern
int
pack_part
(
struct
part_record
*
part_record_point
,
void
**
buf_ptr
,
int
*
buf_len
);
/*
* pack_step - dump state information about a specific job step in
* machine independent form (for network transmission)
* input: dump_step_ptr - pointer to step for which information is requested
* buf_ptr - buffer for step information
* buf_len - byte size of buffer
* output: buf_ptr - advanced to end of data written
* buf_len - byte size remaining in buffer
* return 0 if no error, 1 if buffer too small
* NOTE: change STEP_STRUCT_VERSION in common/slurmlib.h whenever the format changes
* NOTE: change slurm_load_step() in api/step_info.c whenever the data format changes
* NOTE: the caller must insure that the buffer is sufficiently large to hold
* the data being written (space remaining at least BUF_SIZE)
*/
extern
int
pack_step
(
struct
step_record
*
dump_step_ptr
,
void
**
buf_ptr
,
int
*
buf_len
);
/*
* parse_job_specs - pick the appropriate fields out of a job request specification
* input: job_specs - string containing the specification
* req_features, etc. - pointers to storage for the specifications
* output: req_features, etc. - the job's specifications
* returns 0 if no error, errno otherwise
* NOTE: the calling function must xfree memory at req_features[0], req_node_list[0],
* job_name[0], req_group[0], and req_partition[0]
*/
extern
int
parse_job_specs
(
char
*
job_specs
,
char
**
req_features
,
char
**
req_node_list
,
char
**
job_name
,
char
**
req_group
,
char
**
req_partition
,
int
*
contiguous
,
int
*
req_cpus
,
int
*
req_nodes
,
int
*
min_cpus
,
int
*
min_memory
,
int
*
min_tmp_disk
,
int
*
key
,
int
*
shared
,
int
*
dist
,
char
**
script
,
int
*
time_limit
,
int
*
procs_per_task
,
int
*
job_id
,
int
*
priority
,
int
*
user_id
);
/* part_lock - lock the partition information */
extern
void
part_lock
();
/* part_unlock - unlock the partition information */
extern
void
part_unlock
();
/*
* purge_old_job - purge old job records. if memory space is needed.
* the jobs must have completed at least MIN_JOB_AGE minutes ago
*/
void
purge_old_job
(
void
);
/*
* read_buffer - read a line from the specified buffer
* input: buffer - pointer to read buffer, must be allocated by alloc()
* buffer_offset - byte offset in buffer, read location
* buffer_size - byte size of buffer
* line - pointer to location to be loaded with pointer to the line
* output: buffer_offset - incremented by size of size plus the value size itself
* line - set to pointer to the line
* returns 0 if no error or efault on end of buffer, einval on bad tag
*/
extern
int
read_buffer
(
char
*
buffer
,
int
*
buffer_offset
,
int
buffer_size
,
char
**
line
);
/*
* read_slurm_conf - load the slurm configuration from the specified file.
* read_slurm_conf can be called more than once if so desired.
* input: file_name - name of the file containing overall slurm configuration information
* output: return - 0 if no error, otherwise an error code
* global: control_machine - primary machine on which slurmctld runs
* backup_controller - backup machine on which slurmctld runs
* default_part_loc - pointer to default partition
* NOTE: call init_slurm_conf before ever calling read_slurm_conf.
*/
extern
int
read_slurm_conf
(
char
*
file_name
);
/*
* rehash - build a hash table of the node_record entries. this is a large hash table
* to permit the immediate finding of a record based only upon its name without regards
* to the number. there should be no need for a search. the algorithm is optimized for
* node names with a base-ten sequence number suffix. if you have a large cluster and
* use a different naming convention, this function and/or the hash_index function
* should be re-written.
* global: node_record_table_ptr - pointer to global node table
* hash_table - table of hash indecies
* NOTE: allocates memory for hash_table
*/
extern
void
rehash
();
/*
* report_leftover - report any un-parsed (non-whitespace) characters on the
* configuration input line.
* input: in_line - what is left of the configuration input line.
* line_num - line number of the configuration file.
* output: none
*/
/* extern void report_leftover (char *in_line, int line_num); */
/*
* reset_job_bitmaps - reestablish bitmaps for existing jobs.
* this should be called after rebuilding node information,
* but before using any job entries.
* global: last_job_update - time of last job table update
* job_list - pointer to global job list
*/
extern
void
reset_job_bitmaps
();
/*
* schedule - attempt to schedule all pending jobs
* pending jobs for each partition will be scheduled in priority
* order until a request fails
* global: job_list - global list of job records
* last_job_update - time of last update to job table
*/
void
schedule
();
/*
* select_nodes - select and allocate nodes to a specific job
* input: job_ptr - pointer to the job record
* output: returns 0 on success, EINVAL if not possible to satisfy request,
* or EAGAIN if resources are presently busy
* job_ptr->nodes is set to the node list (on success)
* globals: list_part - global list of partition info
* default_part_loc - pointer to default partition
* config_list - global list of node configuration info
*/
extern
int
select_nodes
(
struct
job_record
*
job_ptr
);
/*
* slurm_parser - parse the supplied specification into keyword/value pairs
* only the keywords supplied will be searched for. the supplied specification
* is altered, overwriting the keyword and value pairs with spaces.
* input: spec - pointer to the string of specifications
* sets of three values (as many sets as required): keyword, type, value
* keyword - string with the keyword to search for including equal sign
* (e.g. "name=")
* type - char with value 'd' for int, 'f' for float, 's' for string
* value - pointer to storage location for value (char **) for type 's'
* output: spec - everything read is overwritten by speces
* value - set to read value (unchanged if keyword not found)
* return - 0 if no error, otherwise errno code
* NOTE: terminate with a keyword value of "END"
* NOTE: values of type (char *) are xfreed if non-NULL. caller must xfree any
* returned value
*/
extern
int
slurm_parser
(
char
*
spec
,
...);
/*
* step_create - parse the suppied job specification and create job_records for it
* input: job_specs - job specifications
* new_job_id - location for storing new job's id
* output: new_job_id - the job's ID
* returns 0 on success, EINVAL if specification is invalid
* allocate - if set, job allocation only (no script required)
* globals: job_list - pointer to global job list
* list_part - global list of partition info
* default_part_loc - pointer to default partition
*/
extern
int
step_create
(
char
*
step_specs
,
uint16_t
*
new_job_id
,
int
allocate
);
/* step_lock - lock the step information
* global: step_mutex - semaphore for the step table
*/
extern
void
step_lock
();
/* step_unlock - unlock the step information
* global: step_mutex - semaphore for the step table
*/
extern
void
step_unlock
();
/*
* update_job - update a job's parameters
* input: job_id - job's id
* spec - the updates to the job's specification
* output: return - 0 if no error, otherwise an error code
* global: job_list - global list of job entries
* NOTE: the contents of spec are overwritten by white space
* NOTE: only the job's priority and time_limt may be changed once queued
*/
extern
int
update_job
(
uint16_t
job_id
,
char
*
spec
);
/*
* update_node - update the configuration data for one or more nodes
* input: node_names - node names, may contain regular expression
* spec - the updates to the node's specification
* output: return - 0 if no error, otherwise an error code
*/
extern
int
update_node
(
char
*
node_names
,
char
*
spec
);
/*
* update_part - update a partition's configuration data
* input: partition_name - partition's name
* spec - the updates to the partition's specification
* output: return - 0 if no error, otherwise an error code
* NOTE: the contents of spec are overwritten by white space
*/
extern
int
update_part
(
char
*
partition_name
,
char
*
spec
);
/*
* validate_node_specs - validate the node's specifications as valid,
* if not set state to down, in any case update last_response
* input: node_name - name of the node
* cpus - number of cpus measured
* real_memory - mega_bytes of real_memory measured
* tmp_disk - mega_bytes of tmp_disk measured
* output: returns 0 if no error, enoent if no such node, einval if values too low
*/
extern
int
validate_node_specs
(
char
*
node_name
,
uint32_t
cpus
,
uint32_t
real_memory
,
uint32_t
tmp_disk
);
/*
* write_buffer - write the specified line to the specified buffer,
* enlarging the buffer as needed
* input: buffer - pointer to write buffer, must be allocated by alloc()
* buffer_offset - byte offset in buffer, write location
* buffer_size - byte size of buffer
* line - pointer to data to be writen
* output: buffer - value is written here, buffer may be relocated by realloc()
* buffer_offset - incremented by value_size
* returns 0 if no error or errno otherwise
*/
extern
int
write_buffer
(
char
**
buffer
,
int
*
buffer_offset
,
int
*
buffer_size
,
char
*
line
);
/*
* yes_or_no - map string into integer
* input: in_string: pointer to string containing "YES" or "NO"
* output: returns 1 for "YES", 0 for "NO", -1 otherwise
*/
extern
int
yes_or_no
(
char
*
in_string
);
#endif
/* !_HAVE_SLURM_H */
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment