diff --git a/NEWS b/NEWS index 3f57353c511e891d20ad1193e44f505508e92e98..dc857e36a775723b57195603e33c91d6e20fc681 100644 --- a/NEWS +++ b/NEWS @@ -30,6 +30,7 @@ documents those changes that are of interest to users and admins. -- Patch from Mark Grondona to move blcr scripts into pkglibexecdir -- Patch from Doug Parisek to calculate a job's projected start time under the builtin scheduler. + -- Removed most global variables out of src/common/jobacct_common.h * Changes in SLURM 2.1.0-pre7 ============================= diff --git a/src/common/jobacct_common.c b/src/common/jobacct_common.c index cb89f76911c85bb9e246c51d9477d341d940c64f..cb352144d71853f321d9bb9f586b4a0d428c46e1 100644 --- a/src/common/jobacct_common.c +++ b/src/common/jobacct_common.c @@ -41,15 +41,11 @@ #include "jobacct_common.h" -bool jobacct_shutdown = false; -bool jobacct_suspended = false; -List task_list = NULL; pthread_mutex_t jobacct_lock = PTHREAD_MUTEX_INITIALIZER; -uint32_t cont_id = (uint32_t)NO_VAL; -uint32_t acct_job_id = 0; -uint32_t job_mem_limit = 0; -bool pgid_plugin = false; -uint32_t mult = 1000; +uint32_t jobacct_job_id = 0; +uint32_t jobacct_mem_limit = 0; + +static uint32_t mult = 1000; static void _pack_jobacct_id(jobacct_id_t *jobacct_id, Buf buffer) { @@ -1027,26 +1023,6 @@ unpack_error: return SLURM_ERROR; } -extern int jobacct_common_set_proctrack_container_id(uint32_t id) -{ - if(pgid_plugin) - return SLURM_SUCCESS; - - if(cont_id != (uint32_t)NO_VAL) - info("Warning: jobacct: set_proctrack_container_id: " - "cont_id is already set to %d you are setting it to %d", - cont_id, id); - if(id <= 0) { - error("jobacct: set_proctrack_container_id: " - "I was given most likely an unset cont_id %d", - id); - return SLURM_ERROR; - } - cont_id = id; - - return SLURM_SUCCESS; -} - extern int jobacct_common_set_mem_limit(uint32_t job_id, uint32_t mem_limit) { if ((job_id == 0) || (mem_limit == 0)) { @@ -1055,12 +1031,13 @@ extern int jobacct_common_set_mem_limit(uint32_t job_id, uint32_t mem_limit) return SLURM_ERROR; } - acct_job_id = job_id; - job_mem_limit = mem_limit * 1024; /* MB to KB */ + jobacct_job_id = job_id; + jobacct_mem_limit = mem_limit * 1024; /* MB to KB */ return SLURM_SUCCESS; } -extern int jobacct_common_add_task(pid_t pid, jobacct_id_t *jobacct_id) +extern int jobacct_common_add_task(pid_t pid, jobacct_id_t *jobacct_id, + List task_list) { struct jobacctinfo *jobacct = jobacct_common_alloc_jobacct(jobacct_id); @@ -1087,7 +1064,7 @@ error: return SLURM_ERROR; } -extern struct jobacctinfo *jobacct_common_stat_task(pid_t pid) +extern struct jobacctinfo *jobacct_common_stat_task(pid_t pid, List task_list) { struct jobacctinfo *jobacct = NULL; struct jobacctinfo *ret_jobacct = NULL; @@ -1114,7 +1091,7 @@ error: return ret_jobacct; } -extern struct jobacctinfo *jobacct_common_remove_task(pid_t pid) +extern struct jobacctinfo *jobacct_common_remove_task(pid_t pid, List task_list) { struct jobacctinfo *jobacct = NULL; @@ -1145,20 +1122,3 @@ error: return jobacct; } -extern int jobacct_common_endpoll() -{ - jobacct_shutdown = true; - - return SLURM_SUCCESS; -} - -extern void jobacct_common_suspend_poll() -{ - jobacct_suspended = true; -} - -extern void jobacct_common_resume_poll() -{ - jobacct_suspended = false; -} - diff --git a/src/common/jobacct_common.h b/src/common/jobacct_common.h index bacb25d5f71fa6ebc930104c3c119c115c5aff02..db2bb75e8965565c872a8f6749fd957a51d9cac8 100644 --- a/src/common/jobacct_common.h +++ b/src/common/jobacct_common.h @@ -235,25 +235,18 @@ extern void jobacct_common_2_sacct(sacct_t *sacct, extern void jobacct_common_pack(struct jobacctinfo *jobacct, Buf buffer); extern int jobacct_common_unpack(struct jobacctinfo **jobacct, Buf buffer); -extern int jobacct_common_endpoll(); -extern int jobacct_common_set_proctrack_container_id(uint32_t id); extern int jobacct_common_set_mem_limit(uint32_t job_id, uint32_t mem_limit); -extern int jobacct_common_add_task(pid_t pid, jobacct_id_t *jobacct_id); -extern struct jobacctinfo *jobacct_common_stat_task(pid_t pid); -extern struct jobacctinfo *jobacct_common_remove_task(pid_t pid); -extern void jobacct_common_suspend_poll(); -extern void jobacct_common_resume_poll(); +extern int jobacct_common_add_task(pid_t pid, jobacct_id_t *jobacct_id, + List task_list); +extern struct jobacctinfo *jobacct_common_stat_task(pid_t pid, List task_list); +extern struct jobacctinfo *jobacct_common_remove_task(pid_t pid, + List task_list); /***************************************************************/ /* defined in common_jobacct.c */ -extern bool jobacct_shutdown; -extern bool jobacct_suspended; -extern List task_list; extern pthread_mutex_t jobacct_lock; -extern uint32_t cont_id; -extern uint32_t acct_job_id; -extern uint32_t job_mem_limit; /* job's memory limit in KB */ -extern bool pgid_plugin; +extern uint32_t jobacct_job_id; +extern uint32_t jobacct_mem_limit; /* job's memory limit in KB */ #endif diff --git a/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c b/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c index 375f27f62b7a081872b88ea57a63c10360708b0e..fdf8664d317ab885476bcd871124cd5b4d7119ab 100644 --- a/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c +++ b/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c @@ -87,6 +87,12 @@ const char plugin_type[] = "jobacct_gather/aix"; const uint32_t plugin_version = 100; /* Other useful declarations */ +static bool jobacct_shutdown = 0; +static bool jobacct_suspended = 0; +static List task_list = NULL; +static uint32_t cont_id = (uint32_t)NO_VAL; +static bool pgid_plugin = false; + #ifdef HAVE_AIX typedef struct prec { /* process record */ pid_t pid; @@ -100,6 +106,7 @@ typedef struct prec { /* process record */ static int freq = 0; static int pagesize = 0; + /* Finally, pre-define all the routines. */ static void _acct_kill_job(void); @@ -293,14 +300,14 @@ static void _get_process_data() list_iterator_destroy(itr); slurm_mutex_unlock(&jobacct_lock); - if (job_mem_limit) { + if (jobacct_mem_limit) { debug("Job %u memory used:%u limit:%u KB", - acct_job_id, total_job_mem, job_mem_limit); + jobacct_job_id, total_job_mem, jobacct_mem_limit); } - if (acct_job_id && job_mem_limit && - (total_job_mem > job_mem_limit)) { + if (jobacct_job_id && jobacct_mem_limit && + (total_job_mem > jobacct_mem_limit)) { error("Job %u exceeded %u KB memory limit, being killed", - acct_job_id, job_mem_limit); + jobacct_job_id, jobacct_mem_limit); _acct_kill_job(); } @@ -321,7 +328,7 @@ static void _acct_kill_job(void) /* * Request message: */ - req.job_id = acct_job_id; + req.job_id = jobacct_job_id; req.job_step_id = NO_VAL; req.signal = SIGKILL; req.batch_flag = 0; @@ -497,22 +504,37 @@ extern void jobacct_gather_p_change_poll(uint16_t frequency) extern void jobacct_gather_p_suspend_poll() { - jobacct_common_suspend_poll(); + jobacct_suspended = true; } extern void jobacct_gather_p_resume_poll() { - jobacct_common_resume_poll(); + jobacct_suspended = false; } extern int jobacct_gather_p_set_proctrack_container_id(uint32_t id) { - return jobacct_common_set_proctrack_container_id(id); + if(pgid_plugin) + return SLURM_SUCCESS; + + if(cont_id != (uint32_t)NO_VAL) + info("Warning: jobacct: set_proctrack_container_id: " + "cont_id is already set to %d you are setting it to %d", + cont_id, id); + if(id <= 0) { + error("jobacct: set_proctrack_container_id: " + "I was given most likely an unset cont_id %d", + id); + return SLURM_ERROR; + } + cont_id = id; + + return SLURM_SUCCESS; } extern int jobacct_gather_p_add_task(pid_t pid, jobacct_id_t *jobacct_id) { - return jobacct_common_add_task(pid, jobacct_id); + return jobacct_common_add_task(pid, jobacct_id, task_list); } extern struct jobacctinfo *jobacct_gather_p_stat_task(pid_t pid) @@ -521,14 +543,14 @@ extern struct jobacctinfo *jobacct_gather_p_stat_task(pid_t pid) _get_process_data(); #endif if(pid) - return jobacct_common_stat_task(pid); + return jobacct_common_stat_task(pid, task_list); else return NULL; } extern struct jobacctinfo *jobacct_gather_p_remove_task(pid_t pid) { - return jobacct_common_remove_task(pid); + return jobacct_common_remove_task(pid, task_list); } extern void jobacct_gather_p_2_sacct(sacct_t *sacct, diff --git a/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c b/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c index 7c76aa7ec58dfd99f453a429524ac1cfe5c8bf0f..61e60e2fd4a5afe2f2c99cf7f680e12d2edd55fd 100644 --- a/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c +++ b/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c @@ -96,6 +96,11 @@ typedef struct prec { /* process record */ static int freq = 0; static DIR *slash_proc = NULL; static pthread_mutex_t reading_mutex = PTHREAD_MUTEX_INITIALIZER; +static bool jobacct_shutdown = 0; +static bool jobacct_suspended = 0; +static List task_list = NULL; +static uint32_t cont_id = (uint32_t)NO_VAL; +static bool pgid_plugin = false; /* Finally, pre-define all local routines. */ @@ -342,14 +347,14 @@ static void _get_process_data() { list_iterator_destroy(itr); slurm_mutex_unlock(&jobacct_lock); - if (job_mem_limit) { + if (jobacct_mem_limit) { debug("Job %u memory used:%u limit:%u KB", - acct_job_id, total_job_mem, job_mem_limit); + jobacct_job_id, total_job_mem, jobacct_mem_limit); } - if (acct_job_id && job_mem_limit && - (total_job_mem > job_mem_limit)) { + if (jobacct_job_id && jobacct_mem_limit && + (total_job_mem > jobacct_mem_limit)) { error("Job %u exceeded %u KB memory limit, being killed", - acct_job_id, job_mem_limit); + jobacct_job_id, jobacct_mem_limit); _acct_kill_job(); } @@ -369,7 +374,7 @@ static void _acct_kill_job(void) /* * Request message: */ - req.job_id = acct_job_id; + req.job_id = jobacct_job_id; req.job_step_id = NO_VAL; req.signal = SIGKILL; req.batch_flag = 0; @@ -618,22 +623,37 @@ extern void jobacct_gather_p_change_poll(uint16_t frequency) extern void jobacct_gather_p_suspend_poll() { - jobacct_common_suspend_poll(); + jobacct_suspended = true; } extern void jobacct_gather_p_resume_poll() { - jobacct_common_resume_poll(); + jobacct_suspended = false; } extern int jobacct_gather_p_set_proctrack_container_id(uint32_t id) { - return jobacct_common_set_proctrack_container_id(id); + if(pgid_plugin) + return SLURM_SUCCESS; + + if(cont_id != (uint32_t)NO_VAL) + info("Warning: jobacct: set_proctrack_container_id: " + "cont_id is already set to %d you are setting it to %d", + cont_id, id); + if(id <= 0) { + error("jobacct: set_proctrack_container_id: " + "I was given most likely an unset cont_id %d", + id); + return SLURM_ERROR; + } + cont_id = id; + + return SLURM_SUCCESS; } extern int jobacct_gather_p_add_task(pid_t pid, jobacct_id_t *jobacct_id) { - return jobacct_common_add_task(pid, jobacct_id); + return jobacct_common_add_task(pid, jobacct_id, task_list); } @@ -641,14 +661,14 @@ extern struct jobacctinfo *jobacct_gather_p_stat_task(pid_t pid) { _get_process_data(); if(pid) - return jobacct_common_stat_task(pid); + return jobacct_common_stat_task(pid, task_list); else return NULL; } extern struct jobacctinfo *jobacct_gather_p_remove_task(pid_t pid) { - return jobacct_common_remove_task(pid); + return jobacct_common_remove_task(pid, task_list); } extern void jobacct_gather_p_2_sacct(sacct_t *sacct,