Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
5f39c721
Commit
5f39c721
authored
22 years ago
by
Moe Jette
Browse files
Options
Downloads
Patches
Plain Diff
Added basic support for job steps.
parent
d3176332
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/common/slurm.h
+116
-3
116 additions, 3 deletions
src/common/slurm.h
src/common/slurmlib.h
+1
-0
1 addition, 0 deletions
src/common/slurmlib.h
with
117 additions
and
3 deletions
src/common/slurm.h
+
116
−
3
View file @
5f39c721
...
...
@@ -114,6 +114,7 @@ extern struct part_record *default_part_loc; /* location of default partition */
/* NOTE: change JOB_STRUCT_VERSION value whenever the contents of JOB_STRUCT_FORMAT change */
extern
time_t
last_job_update
;
/* time of last update to part records */
extern
time_t
last_step_update
;
/* time of last update to job steps */
/* last entry must be "end", keep in sync with node_state */
extern
char
*
job_state_string
[];
...
...
@@ -121,10 +122,12 @@ extern char *job_state_string[];
/* Don't accept more jobs once there are MAX_JOB_COUNT in the system */
/* Purge OK for jobs over MIN_JOB_AGE minues old (since completion) */
/* This should prevent exhausting memory */
#define DETAILS_MAGIC 0xdea84e7
#define JOB_MAGIC 0xf0b7392c
#define MAX_JOB_COUNT 1000
#define MIN_JOB_AGE 10
#define
JOB
_MAGIC 0x
f0b7392c
#define DETAILS_MAGIC 0xdea84e7
#define
STEP
_MAGIC 0x
ce593bc1
extern
int
job_count
;
/* number of jobs in the system */
struct
job_details
{
...
...
@@ -161,9 +164,23 @@ struct job_record {
time_t
end_time
;
/* time of termination, actual or expected */
uint32_t
priority
;
/* relative priority of the job */
struct
job_details
*
details
;
/* job details (set until job terminates) */
uint16_t
next_step_id
;
/* next step id to be used */
};
struct
step_record
{
struct
job_record
*
job_ptr
;
/* pointer to job_record (for job_id) */
uint16_t
step_id
;
/* step number */
uint32_t
magic
;
/* magic cookie to test data integrity */
uint16_t
dist
;
/* task distribution 1=cycle, 0=block */
uint16_t
procs_per_task
;
/* processors required per task */
bitstr_t
*
node_bitmap
;
/* bitmap of nodes in allocated to job step */
#ifdef HAVE_LIBELAN3
struct
qsw_jobinfo
*
qsw_jobinfo_t
;
/* Elan3 switch context, opaque data structure */
#endif
};
extern
List
job_list
;
/* list of job_record entries */
extern
List
step_list
;
/* list of job_step entries */
/* allocate_nodes - for a given bitmap, change the state of specified nodes to stage_in
...
...
@@ -262,6 +279,17 @@ extern struct node_record *create_node_record (struct config_record
*/
extern
struct
part_record
*
create_part_record
(
void
);
/*
* create_step_record - create an empty step_record.
* load its values with defaults (zeros, nulls, and magic cookie)
* input: error_code - location to store error value in
* output: error_code - set to zero if no error, errno otherwise
* returns a pointer to the record or NULL if error
* global: step_list - global step list
* NOTE: allocates memory that should be xfreed with delete_step_record
*/
extern
struct
step_record
*
create_step_record
(
int
*
error_code
);
/* deallocate_nodes - for a given bitmap, change the state of specified nodes to idle
* this is a simple prototype for testing
* globals: node_record_count - number of nodes in the system
...
...
@@ -299,13 +327,22 @@ extern int delete_node_record (char *name);
*/
extern
int
delete_part_record
(
char
*
name
);
/*
* delete_step_record - delete record for job step with specified job_id and step_id
* input: job_id - job_id of the desired job
* step_id - id of the desired job step
* output: return 0 on success, errno otherwise
* global: step_list - global step list
*/
extern
int
delete_step_record
(
char
*
job_id
,
uint16_t
step_id
);
/*
* find_job_record - return a pointer to the job record with the given job_id
* input: job_id - requested job's id
* output: pointer to the job's record, NULL on error
* global: job_list - global job list pointer
*/
extern
struct
job_record
*
find_job_record
(
char
*
job_id
);
extern
struct
job_record
*
find_job_record
(
char
*
job_id
);
/*
* find_node_record - find a record for node with specified name,
...
...
@@ -322,6 +359,15 @@ extern struct node_record *find_node_record (char *name);
*/
extern
struct
part_record
*
find_part_record
(
char
*
name
);
/*
* find_step_record - return a pointer to the step record with the given job_id and step_id
* input: job_id - requested job's id
* step_id - id of the desired job step
* output: pointer to the job step's record, NULL on error
* global: step_list - global step list
*/
extern
struct
step_record
*
find_step_record
(
char
*
job_id
,
uint16_t
step_id
);
/*
* init_job_conf - initialize the job configuration tables and values.
* this should be called after creating node information, but
...
...
@@ -346,6 +392,14 @@ extern int init_node_conf ();
*/
extern
int
init_part_conf
();
/*
* init_step_conf - initialize the job step configuration tables and values.
* this should be called before creating any job step entries.
* output: return value - 0 if no error, otherwise an error code
* global: step_list - global step list
*/
extern
int
init_step_conf
();
/*
* init_slurm_conf - initialize or re-initialize the slurm configuration
* values. this should be called before calling read_slurm_conf.
...
...
@@ -577,6 +631,25 @@ extern int pack_all_node (char **buffer_ptr, int *buffer_size, time_t * update_t
*/
extern
int
pack_all_part
(
char
**
buffer_ptr
,
int
*
buffer_size
,
time_t
*
update_time
);
/*
* pack_all_step - dump all job step information for all steps in
* machine independent form (for network transmission)
* input: buffer_ptr - location into which a pointer to the data is to be stored.
* the calling function must xfree the storage.
* buffer_size - location into which the size of the created buffer is in bytes
* update_time - dump new data only if partition records updated since time
* specified, otherwise return empty buffer
* output: buffer_ptr - the pointer is set to the allocated buffer.
* buffer_size - set to size of the buffer in bytes
* update_time - set to time partition records last updated
* returns 0 if no error, errno otherwise
* global: step_list - global list of partition records
* NOTE: the buffer at *buffer_ptr must be xfreed by the caller
* NOTE: change STEP_STRUCT_VERSION in common/slurmlib.h whenever the format changes
* NOTE: change slurm_load_step() in api/step_info.c whenever the data format changes
*/
extern
int
pack_all_step
(
char
**
buffer_ptr
,
int
*
buffer_size
,
time_t
*
update_time
);
/*
* pack_job - dump all configuration information about a specific job in
* machine independent form (for network transmission)
...
...
@@ -619,6 +692,22 @@ extern int pack_node (struct node_record *dump_node_ptr, void **buf_ptr, int *bu
*/
extern
int
pack_part
(
struct
part_record
*
part_record_point
,
void
**
buf_ptr
,
int
*
buf_len
);
/*
* pack_step - dump state information about a specific job step in
* machine independent form (for network transmission)
* input: dump_step_ptr - pointer to step for which information is requested
* buf_ptr - buffer for step information
* buf_len - byte size of buffer
* output: buf_ptr - advanced to end of data written
* buf_len - byte size remaining in buffer
* return 0 if no error, 1 if buffer too small
* NOTE: change STEP_STRUCT_VERSION in common/slurmlib.h whenever the format changes
* NOTE: change slurm_load_step() in api/step_info.c whenever the data format changes
* NOTE: the caller must insure that the buffer is sufficiently large to hold
* the data being written (space remaining at least BUF_SIZE)
*/
extern
int
pack_step
(
struct
step_record
*
dump_step_ptr
,
void
**
buf_ptr
,
int
*
buf_len
);
/*
* parse_job_specs - pick the appropriate fields out of a job request specification
* input: job_specs - string containing the specification
...
...
@@ -745,6 +834,30 @@ extern int select_nodes (struct job_record *job_ptr);
*/
extern
int
slurm_parser
(
char
*
spec
,
...);
/*
* step_create - parse the suppied job specification and create job_records for it
* input: job_specs - job specifications
* new_job_id - location for storing new job's id
* output: new_job_id - the job's ID
* returns 0 on success, EINVAL if specification is invalid
* allocate - if set, job allocation only (no script required)
* globals: job_list - pointer to global job list
* list_part - global list of partition info
* default_part_loc - pointer to default partition
* NOTE: the calling program must xfree the memory pointed to by new_job_id
*/
extern
int
step_create
(
char
*
step_specs
,
char
**
new_job_id
,
int
allocate
);
/* step_lock - lock the step information
* global: step_mutex - semaphore for the step table
*/
extern
void
step_lock
();
/* step_unlock - unlock the step information
* global: step_mutex - semaphore for the step table
*/
extern
void
step_unlock
();
/*
* update_job - update a job's parameters
* input: job_id - job's id
...
...
This diff is collapsed.
Click to expand it.
src/common/slurmlib.h
+
1
−
0
View file @
5f39c721
...
...
@@ -16,6 +16,7 @@
#define SLURMCTLD_HOST "134.9.55.42"
#define SLURMCTLD_PORT 1543
#define STATE_NO_RESPOND 0x8000
#define STEP_STRUCT_VERSION 1
/* INFINITE is used to identify unlimited configurations, */
/* eg. the maximum count of nodes any job may use in some partition */
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment