-
Danny Auble authoredDanny Auble authored
assoc_mgr.h 13.33 KiB
/*****************************************************************************\
* assoc_mgr.h - keep track of local cache of accounting data.
*****************************************************************************
* Copyright (C) 2004-2007 The Regents of the University of California.
* Copyright (C) 2008 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Danny Auble <da@llnl.gov>
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.schedmd.com/slurmdocs/>.
* Please also read the included file: DISCLAIMER.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifndef _SLURM_ASSOC_MGR_H
#define _SLURM_ASSOC_MGR_H
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "src/common/list.h"
#include "src/common/slurm_accounting_storage.h"
#include "src/common/slurmdbd_defs.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/locks.h"
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
#define ASSOC_MGR_CACHE_ASSOC 0x0001
#define ASSOC_MGR_CACHE_QOS 0x0002
#define ASSOC_MGR_CACHE_USER 0x0004
#define ASSOC_MGR_CACHE_WCKEY 0x0008
#define ASSOC_MGR_CACHE_ALL 0xffff
/* to lock or not */
typedef struct {
lock_level_t assoc;
lock_level_t file;
lock_level_t qos;
lock_level_t user;
lock_level_t wckey;
} assoc_mgr_lock_t;
/* Interval lock structure
* we actually use three semaphores for each data type, see macros below
* (assoc_mgr_lock_datatype_t * 3 + 0) = read_lock
* (assoc_mgr_lock_datatype_t * 3 + 1) = write_lock
* (assoc_mgr_lock_datatype_t * 3 + 2) = write_wait_lock
*/
typedef enum {
ASSOC_LOCK,
FILE_LOCK,
QOS_LOCK,
USER_LOCK,
WCKEY_LOCK,
ASSOC_MGR_ENTITY_COUNT
} assoc_mgr_lock_datatype_t;
typedef struct {
int entity[ASSOC_MGR_ENTITY_COUNT * 3];
} assoc_mgr_lock_flags_t;
typedef struct {
uint16_t cache_level;
uint16_t enforce;
void (*remove_assoc_notify) (slurmdb_association_rec_t *rec);
void (*remove_qos_notify) (slurmdb_qos_rec_t *rec);
void (*update_assoc_notify) (slurmdb_association_rec_t *rec);
void (*update_qos_notify) (slurmdb_qos_rec_t *rec);
void (*update_resvs) ();
} assoc_init_args_t;
struct assoc_mgr_association_usage {
List childern_list; /* list of childern associations
* (DON'T PACK) */
uint32_t grp_used_cpus; /* count of active jobs in the group
* (DON'T PACK) */
uint32_t grp_used_mem; /* count of active memory in the group
* (DON'T PACK) */
uint32_t grp_used_nodes; /* count of active jobs in the group
* (DON'T PACK) */
double grp_used_wall; /* group count of time used in
* running jobs (DON'T PACK) */
uint64_t grp_used_cpu_run_secs; /* count of running cpu secs
* (DON'T PACK) */
uint32_t level_shares; /* number of shares on this level of
* the tree (DON'T PACK) */
slurmdb_association_rec_t *parent_assoc_ptr; /* ptr to parent acct
* set in slurmctld
* (DON'T PACK) */
double shares_norm; /* normalized shares (DON'T PACK) */
long double usage_efctv;/* effective, normalized usage (DON'T PACK) */
long double usage_norm; /* normalized usage (DON'T PACK) */
long double usage_raw; /* measure of resource usage (DON'T PACK) */
uint32_t used_jobs; /* count of active jobs (DON'T PACK) */
uint32_t used_submit_jobs; /* count of jobs pending or running
* (DON'T PACK) */
uint32_t tickets; /* Number of tickets (for multifactor2
* plugin). (DON'T PACK) */
unsigned active_seqno; /* Sequence number for identifying
* active associations (DON'T PACK) */
bitstr_t *valid_qos; /* qos available for this association
* derived from the qos_list.
* (DON'T PACK) */
};
struct assoc_mgr_qos_usage {
List job_list; /* list of job pointers to submitted/running
jobs (DON'T PACK) */
uint32_t grp_used_cpus; /* count of cpus in use in this qos
* (DON'T PACK) */
uint64_t grp_used_cpu_run_secs; /* count of running cpu secs
* (DON'T PACK) */
uint32_t grp_used_jobs; /* count of active jobs (DON'T PACK) */
uint32_t grp_used_mem; /* count of memory in use in this qos
* (DON'T PACK) */
uint32_t grp_used_nodes; /* count of nodes in use in this qos
* (DON'T PACK) */
uint32_t grp_used_submit_jobs; /* count of jobs pending or running
* (DON'T PACK) */
double grp_used_wall; /* group count of time (minutes) used in
* running jobs (DON'T PACK) */
double norm_priority;/* normalized priority (DON'T PACK) */
long double usage_raw; /* measure of resource usage (DON'T PACK) */
List user_limit_list; /* slurmdb_used_limits_t's (DON'T PACK) */
};
extern List assoc_mgr_association_list;
extern List assoc_mgr_qos_list;
extern List assoc_mgr_user_list;
extern List assoc_mgr_wckey_list;
extern slurmdb_association_rec_t *assoc_mgr_root_assoc;
extern uint32_t g_qos_max_priority; /* max priority in all qos's */
extern uint32_t g_qos_count; /* count used for generating qos bitstr's */
extern int assoc_mgr_init(void *db_conn, assoc_init_args_t *args,
int db_conn_errno);
extern int assoc_mgr_fini(char *state_save_location);
extern void assoc_mgr_lock(assoc_mgr_lock_t *locks);
extern void assoc_mgr_unlock(assoc_mgr_lock_t *locks);
extern assoc_mgr_association_usage_t *create_assoc_mgr_association_usage();
extern void destroy_assoc_mgr_association_usage(void *object);
extern assoc_mgr_qos_usage_t *create_assoc_mgr_qos_usage();
extern void destroy_assoc_mgr_qos_usage(void *object);
/*
* get info from the storage
* IN: assoc - slurmdb_association_rec_t with at least cluster and
* account set for account association. To get user
* association set user, and optional partition.
* Sets "id" field with the association ID.
* IN: enforce - return an error if no such association exists
* IN/OUT: assoc_list - contains a list of assoc_rec ptrs to
* associations this user has in the list. This
* list should be created with list_create(NULL)
* since we are putting pointers to memory used elsewhere.
* RET: SLURM_SUCCESS on success, else SLURM_ERROR
*/
extern int assoc_mgr_get_user_assocs(void *db_conn,
slurmdb_association_rec_t *assoc,
int enforce,
List assoc_list);
/*
* get info from the storage
* IN/OUT: assoc - slurmdb_association_rec_t with at least cluster and
* account set for account association. To get user
* association set user, and optional partition.
* Sets "id" field with the association ID.
* IN: enforce - return an error if no such association exists
* IN/OUT: assoc_pptr - if non-NULL then return a pointer to the
* slurmdb_association record in cache on success
* DO NOT FREE.
* RET: SLURM_SUCCESS on success, else SLURM_ERROR
*/
extern int assoc_mgr_fill_in_assoc(void *db_conn,
slurmdb_association_rec_t *assoc,
int enforce,
slurmdb_association_rec_t **assoc_pptr);
/*
* get info from the storage
* IN/OUT: user - slurmdb_user_rec_t with the name set of the user.
* "default_account" will be filled in on
* successful return DO NOT FREE.
* IN/OUT: user_pptr - if non-NULL then return a pointer to the
* slurmdb_user record in cache on success
* DO NOT FREE.
* RET: SLURM_SUCCESS on success SLURM_ERROR else
*/
extern int assoc_mgr_fill_in_user(void *db_conn, slurmdb_user_rec_t *user,
int enforce,
slurmdb_user_rec_t **user_pptr);
/*
* get info from the storage
* IN/OUT: qos - slurmdb_qos_rec_t with the id set of the qos.
* IN/OUT: qos_pptr - if non-NULL then return a pointer to the
* slurmdb_qos record in cache on success
* DO NOT FREE.
* RET: SLURM_SUCCESS on success SLURM_ERROR else
*/
extern int assoc_mgr_fill_in_qos(void *db_conn, slurmdb_qos_rec_t *qos,
int enforce,
slurmdb_qos_rec_t **qos_pptr);
/*
* get info from the storage
* IN/OUT: wckey - slurmdb_wckey_rec_t with the name, cluster and user
* for the wckey association.
* Sets "id" field with the wckey ID.
* IN: enforce - return an error if no such wckey exists
* IN/OUT: wckey_pptr - if non-NULL then return a pointer to the
* slurmdb_wckey record in cache on success
* RET: SLURM_SUCCESS on success, else SLURM_ERROR
*/
extern int assoc_mgr_fill_in_wckey(void *db_conn,
slurmdb_wckey_rec_t *wckey,
int enforce,
slurmdb_wckey_rec_t **wckey_pptr);
/*
* get admin_level of uid
* IN: uid - uid of user to check admin_level of.
* RET: admin level SLURMDB_ADMIN_NOTSET on error
*/
extern slurmdb_admin_level_t assoc_mgr_get_admin_level(void *db_conn,
uint32_t uid);
/*
* see if user is coordinator of given acct
* IN: uid - uid of user to check.
* IN: acct - name of account
* RET: true or false
*/
extern bool assoc_mgr_is_user_acct_coord(void *db_conn, uint32_t uid,
char *acct);
/*
* get the share information from the association list in the form of
* a list containing association_share_object_t's
* IN: uid: uid_t of user issuing the request
* IN: acct_list: char * list of accounts you want (NULL for all)
* IN: user_list: char * list of user names you want (NULL for all)
*/
extern List assoc_mgr_get_shares(
void *db_conn, uid_t uid, List acct_list, List user_list);
/*
* assoc_mgr_update - update the association manager
* IN update_list: updates to perform
* RET: error code
* NOTE: the items in update_list are not deleted
*/
extern int assoc_mgr_update(List update_list);
/*
* update associations in cache
* IN: slurmdb_update_object_t *object
* RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else
*/
extern int assoc_mgr_update_assocs(slurmdb_update_object_t *update);
/*
* update wckeys in cache
* IN: slurmdb_update_object_t *object
* RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else
*/
extern int assoc_mgr_update_wckeys(slurmdb_update_object_t *update);
/*
* update qos in cache
* IN: slurmdb_update_object_t *object
* RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else
*/
extern int assoc_mgr_update_qos(slurmdb_update_object_t *update);
/*
* update users in cache
* IN: slurmdb_update_object_t *object
* RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else
*/
extern int assoc_mgr_update_users(slurmdb_update_object_t *update);
/*
* validate that an association ID is still valid
* IN: assoc_id - association ID previously returned by
* get_assoc_id(void *db_conn,
)
* RET: SLURM_SUCCESS on success SLURM_ERROR else
*/
extern int assoc_mgr_validate_assoc_id(void *db_conn,
uint32_t assoc_id,
int enforce);
/*
* clear the used_* fields from every assocation,
* used on reconfiguration
*/
extern void assoc_mgr_clear_used_info(void);
/*
* Remove the association's accumulated usage
* IN: slurmdb_association_rec_t *assoc
* RET: SLURM_SUCCESS on success or else SLURM_ERROR
*/
extern void assoc_mgr_remove_assoc_usage(slurmdb_association_rec_t *assoc);
/*
* Dump the state information of the association mgr just incase the
* database isn't up next time we run.
*/
extern int dump_assoc_mgr_state(char *state_save_location);
/*
* Read in the past usage for associations.
*/
extern int load_assoc_usage(char *state_save_location);
/*
* Read in the past usage for qos.
*/
extern int load_qos_usage(char *state_save_location);
/*
* Read in the information of the association mgr if the database
* isn't up when starting.
*/
extern int load_assoc_mgr_state(char *state_save_location);
/*
* Refresh the lists if when running_cache is set this will load new
* information from the database (if any) and update the cached list.
* If args are set will set internal variables and return, no lists
* are refreshed.
*/
extern int assoc_mgr_refresh_lists(void *db_conn, assoc_init_args_t *args);
/*
* Sets the uids of users added to the system after the start of the
* calling program.
*/
extern int assoc_mgr_set_missing_uids();
#endif /* _SLURM_ASSOC_MGR_H */