From 24be5534e31c6ff4ad2a4a81bca21c3c17e3a069 Mon Sep 17 00:00:00 2001 From: Martin Perry <Martin.Perry@Bull.com> Date: Thu, 23 May 2013 18:47:34 -0700 Subject: [PATCH] INITIAL code to change acctg_freq from a uint16_t to a char * --- slurm/slurm.h.in | 6 +- src/api/config_info.c | 4 +- src/api/init_msg.c | 1 - src/api/step_launch.c | 1 - src/common/read_config.c | 9 +-- src/common/read_config.h | 2 +- src/common/slurm_protocol_api.c | 6 +- src/common/slurm_protocol_api.h | 2 +- src/common/slurm_protocol_defs.c | 2 + src/common/slurm_protocol_defs.h | 4 +- src/common/slurm_protocol_pack.c | 91 +++++++++++++++++++++-------- src/salloc/opt.h | 3 +- src/salloc/salloc.c | 4 +- src/sbatch/opt.c | 5 +- src/sbatch/opt.h | 3 +- src/sbatch/sbatch.c | 4 +- src/slurmctld/job_mgr.c | 76 ++++++++++++++++++++---- src/slurmctld/job_scheduler.c | 2 +- src/slurmctld/proc_req.c | 4 +- src/slurmctld/slurmctld.h | 2 +- src/slurmd/common/slurmstepd_init.c | 5 +- src/slurmd/slurmd/slurmd.c | 3 +- src/slurmd/slurmd/slurmd.h | 4 +- src/srun/libsrun/allocate.c | 4 +- src/srun/libsrun/opt.c | 6 +- src/srun/libsrun/opt.h | 3 +- 26 files changed, 179 insertions(+), 77 deletions(-) diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index e89c76c70d9..1046cd87787 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -964,7 +964,7 @@ typedef struct ext_sensors_data { typedef struct job_descriptor { /* For submit, allocate, and update requests */ char *account; /* charge to specified account */ - uint16_t acctg_freq; /* accounting polling interval (seconds) */ + char *acctg_freq; /* accounting polling intervals (seconds) */ char *alloc_node; /* node making resource allocation request * NOTE: Normally set by slurm_submit* or * slurm_allocate* function */ @@ -1403,7 +1403,7 @@ typedef struct { char *mpi_plugin_name; uint8_t open_mode; - uint16_t acctg_freq; + char *acctg_freq; bool pty; char *ckpt_dir; char *restart_dir; @@ -1974,7 +1974,7 @@ typedef struct slurm_ctl_conf { char * health_check_program; /* pathname of health check program */ uint16_t inactive_limit;/* seconds of inactivity before a * inactive resource allocation is released */ - uint16_t job_acct_gather_freq; /* poll frequency for job accounting + char *job_acct_gather_freq; /* poll frequency for job accounting * gather plugins */ char *job_acct_gather_type; /* job accounting gather type */ char *job_ckpt_dir; /* directory saving job record checkpoint */ diff --git a/src/api/config_info.c b/src/api/config_info.c index f8e6de565ee..f07e303ad43 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -447,11 +447,9 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); - snprintf(tmp_str, sizeof(tmp_str), "%u sec", - slurm_ctl_conf_ptr->job_acct_gather_freq); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobAcctGatherFrequency"); - key_pair->value = xstrdup(tmp_str); + key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_acct_gather_freq); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); diff --git a/src/api/init_msg.c b/src/api/init_msg.c index 59bde9b0fb5..7f718ae6310 100644 --- a/src/api/init_msg.c +++ b/src/api/init_msg.c @@ -58,7 +58,6 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg) { memset(job_desc_msg, 0, sizeof(job_desc_msg_t)); - job_desc_msg->acctg_freq = (uint16_t) NO_VAL; job_desc_msg->alloc_sid = NO_VAL; job_desc_msg->conn_type[0] = (uint16_t) NO_VAL; job_desc_msg->contiguous = (uint16_t) NO_VAL; diff --git a/src/api/step_launch.c b/src/api/step_launch.c index bf6406c1089..9e24597e01d 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -137,7 +137,6 @@ void slurm_step_launch_params_t_init (slurm_step_launch_params_t *ptr) ptr->buffered_stdio = true; memcpy(&ptr->local_fds, &fds, sizeof(fds)); ptr->gid = getgid(); - ptr->acctg_freq = (uint16_t) NO_VAL; ptr->cpu_freq = NO_VAL; } diff --git a/src/common/read_config.c b/src/common/read_config.c index e1938e07db9..45094375478 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -205,7 +205,7 @@ s_p_options_t slurm_conf_options[] = { {"HealthCheckProgram", S_P_STRING}, {"InactiveLimit", S_P_UINT16}, {"JobAcctGatherType", S_P_STRING}, - {"JobAcctGatherFrequency", S_P_UINT16}, + {"JobAcctGatherFrequency", S_P_STRING}, {"JobCheckpointDir", S_P_STRING}, {"JobCompHost", S_P_STRING}, {"JobCompLoc", S_P_STRING}, @@ -2190,7 +2190,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) xfree(ctl_conf_ptr->health_check_program); ctl_conf_ptr->inactive_limit = (uint16_t) NO_VAL; xfree (ctl_conf_ptr->job_acct_gather_type); - ctl_conf_ptr->job_acct_gather_freq = 0; + xfree (ctl_conf_ptr->job_acct_gather_freq); xfree (ctl_conf_ptr->job_ckpt_dir); xfree (ctl_conf_ptr->job_comp_loc); xfree (ctl_conf_ptr->job_comp_pass); @@ -2792,9 +2792,10 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->inactive_limit = DEFAULT_INACTIVE_LIMIT; } - if (!s_p_get_uint16(&conf->job_acct_gather_freq, + if (!s_p_get_string(&conf->job_acct_gather_freq, "JobAcctGatherFrequency", hashtbl)) - conf->job_acct_gather_freq = DEFAULT_JOB_ACCT_GATHER_FREQ; + conf->job_acct_gather_freq = + xstrdup(DEFAULT_JOB_ACCT_GATHER_FREQ); if (!s_p_get_string(&conf->job_acct_gather_type, "JobAcctGatherType", hashtbl)) diff --git a/src/common/read_config.h b/src/common/read_config.h index 241fcbb4ce8..8aea7e8807a 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -78,7 +78,7 @@ extern char *default_plugstack; #define DEFAULT_INACTIVE_LIMIT 0 #define DEFAULT_JOB_ACCT_GATHER_TYPE "jobacct_gather/none" #define JOB_ACCT_GATHER_TYPE_NONE "jobacct_gather/none" -#define DEFAULT_JOB_ACCT_GATHER_FREQ 30 +#define DEFAULT_JOB_ACCT_GATHER_FREQ "30" #define DEFAULT_ACCT_GATHER_ENERGY_TYPE "acct_gather_energy/none" #define DEFAULT_ACCT_GATHER_PROFILE_TYPE "acct_gather_profile/none" #define DEFAULT_ACCT_GATHER_INFINIBAND_TYPE "acct_gather_infiniband/none" diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 3a5e534fb8a..2cb76d5ad35 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -1377,15 +1377,15 @@ char *slurm_get_jobacct_gather_type(void) * returns the job accounting poll frequency from the slurmctld_conf object * RET int - job accounting frequency */ -uint16_t slurm_get_jobacct_gather_freq(void) +char *slurm_get_jobacct_gather_freq(void) { - uint16_t freq = 0; + char *freq = NULL; slurm_ctl_conf_t *conf; if (slurmdbd_conf) { } else { conf = slurm_conf_lock(); - freq = conf->job_acct_gather_freq; + freq = xstrdup(conf->job_acct_gather_freq); slurm_conf_unlock(); } return freq; diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index 581bf7b799d..cf570c87d28 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -478,7 +478,7 @@ char *slurm_get_jobacct_gather_type(void); * returns the job accounting poll frequency from the slurmctld_conf object * RET int - job accounting frequency */ -uint16_t slurm_get_jobacct_gather_freq(void); +char *slurm_get_jobacct_gather_freq(void); /* slurm_get_jobcomp_type * returns the job completion logger type from slurmctld_conf object diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 39b523b044e..efef8a43484 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -437,6 +437,7 @@ extern void slurm_free_job_launch_msg(batch_job_launch_msg_t * msg) int i; if (msg) { + xfree(msg->acctg_freq); xfree(msg->alias_list); xfree(msg->nodes); xfree(msg->cpu_bind); @@ -701,6 +702,7 @@ extern void slurm_free_launch_tasks_request_msg(launch_tasks_request_msg_t * msg } xfree(msg->env); } + xfree(msg->acctg_freq); xfree(msg->alias_list); xfree(msg->cwd); xfree(msg->cpu_bind); diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 4df81c7ed84..e624e646cc7 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -692,7 +692,7 @@ typedef struct launch_tasks_request_msg { 1 for "user manged" IO */ uint8_t open_mode; /* stdout/err append or truncate */ uint8_t pty; /* use pseudo tty */ - uint16_t acctg_freq; /* accounting polling interval */ + char *acctg_freq; /* accounting polling intervals */ uint32_t cpu_freq; /* requested cpu frequency */ /********** START "normal" IO only options **********/ @@ -828,7 +828,7 @@ typedef struct batch_job_launch_msg { uint32_t pn_min_memory; /* minimum real memory per node OR * real memory per CPU | MEM_PER_CPU, * default=0 (no limit) */ - uint16_t acctg_freq; /* accounting polling interval */ + char *acctg_freq; /* accounting polling intervals */ uint32_t cpu_freq; /* requested cpu frequency */ uint32_t job_mem; /* memory limit for job */ uint16_t restart_cnt; /* batch job restart count */ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 54f9e9fb0e9..4f737830bea 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -4840,6 +4840,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, uint16_t protocol_version) { uint32_t count = NO_VAL; + uint16_t uint16 = 0; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { @@ -4897,7 +4898,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack16(build_ptr->inactive_limit, buffer); - pack16(build_ptr->job_acct_gather_freq, buffer); + packstr(build_ptr->job_acct_gather_freq, buffer); packstr(build_ptr->job_acct_gather_type, buffer); packstr(build_ptr->job_ckpt_dir, buffer); @@ -5104,8 +5105,9 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, packstr(build_ptr->health_check_program, buffer); pack16(build_ptr->inactive_limit, buffer); - - pack16(build_ptr->job_acct_gather_freq, buffer); + if (build_ptr->job_acct_gather_freq) + uint16 = atoi(build_ptr->job_acct_gather_freq); + pack16(uint16, buffer); packstr(build_ptr->job_acct_gather_type, buffer); packstr(build_ptr->job_ckpt_dir, buffer); @@ -5306,7 +5308,9 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack16(build_ptr->inactive_limit, buffer); - pack16(build_ptr->job_acct_gather_freq, buffer); + if (build_ptr->job_acct_gather_freq) + uint16 = atoi(build_ptr->job_acct_gather_freq); + pack16(uint16, buffer); packstr(build_ptr->job_acct_gather_type, buffer); packstr(build_ptr->job_ckpt_dir, buffer); @@ -5469,6 +5473,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, { uint32_t count = NO_VAL; uint32_t uint32_tmp; + uint16_t uint16 = 0; slurm_ctl_conf_info_msg_t *build_ptr; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); @@ -5556,7 +5561,8 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack16(&build_ptr->inactive_limit, buffer); - safe_unpack16(&build_ptr->job_acct_gather_freq, buffer); + safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_freq, + &uint32_tmp, buffer); safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_type, &uint32_tmp, buffer); @@ -5845,7 +5851,10 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack16(&build_ptr->inactive_limit, buffer); - safe_unpack16(&build_ptr->job_acct_gather_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + build_ptr->job_acct_gather_freq = + xstrdup_printf("%u", uint16); safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_type, &uint32_tmp, buffer); @@ -6123,7 +6132,10 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack16(&build_ptr->inactive_limit, buffer); - safe_unpack16(&build_ptr->job_acct_gather_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + build_ptr->job_acct_gather_freq = + xstrdup_printf("%u", uint16); safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_type, &uint32_tmp, buffer); @@ -6352,6 +6364,8 @@ static void _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, uint16_t protocol_version) { + uint16_t uint16 = 0; + /* load the data values */ if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { pack16(job_desc_ptr->contiguous, buffer); @@ -6380,7 +6394,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, pack8(job_desc_ptr->open_mode, buffer); pack8(job_desc_ptr->overcommit, buffer); - pack16(job_desc_ptr->acctg_freq, buffer); + packstr(job_desc_ptr->acctg_freq, buffer); pack32(job_desc_ptr->num_tasks, buffer); pack16(job_desc_ptr->ckpt_interval, buffer); @@ -6526,7 +6540,9 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, pack8(job_desc_ptr->open_mode, buffer); pack8(job_desc_ptr->overcommit, buffer); - pack16(job_desc_ptr->acctg_freq, buffer); + if (job_desc_ptr->acctg_freq) + uint16 = atoi(job_desc_ptr->acctg_freq); + pack16(uint16, buffer); pack32(job_desc_ptr->num_tasks, buffer); pack16(job_desc_ptr->ckpt_interval, buffer); @@ -6673,7 +6689,9 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, ///<<<<<<< slurm_protocol_pack.c nlk temp.... why is this here pack8(job_desc_ptr->open_mode, buffer); pack8(job_desc_ptr->overcommit, buffer); - pack16(job_desc_ptr->acctg_freq, buffer); + if (job_desc_ptr->acctg_freq) + uint16 = atoi(job_desc_ptr->acctg_freq); + pack16(uint16, buffer); pack32(job_desc_ptr->num_tasks, buffer); pack16(job_desc_ptr->ckpt_interval, buffer); @@ -6808,6 +6826,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, uint16_t protocol_version) { uint32_t uint32_tmp; + uint16_t uint16 = 0; job_desc_msg_t *job_desc_ptr = NULL; /* alloc memory for structure */ @@ -6851,7 +6870,8 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, safe_unpack8(&job_desc_ptr->open_mode, buffer); safe_unpack8(&job_desc_ptr->overcommit, buffer); - safe_unpack16(&job_desc_ptr->acctg_freq, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->acctg_freq, + &uint32_tmp, buffer); safe_unpack32(&job_desc_ptr->num_tasks, buffer); safe_unpack16(&job_desc_ptr->ckpt_interval, buffer); @@ -6986,7 +7006,9 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, safe_unpack8(&job_desc_ptr->open_mode, buffer); safe_unpack8(&job_desc_ptr->overcommit, buffer); - safe_unpack16(&job_desc_ptr->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + job_desc_ptr->acctg_freq = xstrdup_printf("%u", uint16); safe_unpack32(&job_desc_ptr->num_tasks, buffer); safe_unpack16(&job_desc_ptr->ckpt_interval, buffer); @@ -7121,7 +7143,9 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, safe_unpack8(&job_desc_ptr->open_mode, buffer); safe_unpack8(&job_desc_ptr->overcommit, buffer); - safe_unpack16(&job_desc_ptr->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + job_desc_ptr->acctg_freq = xstrdup_printf("%u", uint16); safe_unpack32(&job_desc_ptr->num_tasks, buffer); safe_unpack16(&job_desc_ptr->ckpt_interval, buffer); @@ -7512,6 +7536,8 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, { uint32_t cluster_flags = slurmdb_setup_cluster_flags(); int i = 0; + uint16_t uint16 = 0; + xassert(msg != NULL); if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { @@ -7572,7 +7598,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, pack8(msg->open_mode, buffer); pack8(msg->pty, buffer); - pack16(msg->acctg_freq, buffer); + packstr(msg->acctg_freq, buffer); pack32(msg->cpu_freq, buffer); packstr(msg->ckpt_dir, buffer); packstr(msg->restart_dir, buffer); @@ -7646,7 +7672,9 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, pack8(msg->open_mode, buffer); pack8(msg->pty, buffer); - pack16(msg->acctg_freq, buffer); + if (msg->acctg_freq) + uint16 = atoi(msg->acctg_freq); + pack16(uint16, buffer); pack32(msg->cpu_freq, buffer); packstr(msg->ckpt_dir, buffer); packstr(msg->restart_dir, buffer); @@ -7718,7 +7746,9 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, pack8(msg->open_mode, buffer); pack8(msg->pty, buffer); - pack16(msg->acctg_freq, buffer); + if (msg->acctg_freq) + uint16 = atoi(msg->acctg_freq); + pack16(uint16, buffer); packstr(msg->ckpt_dir, buffer); packstr(msg->restart_dir, buffer); if (!(cluster_flags & CLUSTER_FLAG_BG)) { @@ -7745,6 +7775,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** { uint32_t cluster_flags = slurmdb_setup_cluster_flags(); uint32_t uint32_tmp; + uint16_t uint16 = 0; launch_tasks_request_msg_t *msg; int i = 0; @@ -7840,7 +7871,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** safe_unpack8(&msg->open_mode, buffer); safe_unpack8(&msg->pty, buffer); - safe_unpack16(&msg->acctg_freq, buffer); + safe_unpackstr_xmalloc(&msg->acctg_freq, &uint32_tmp, buffer); safe_unpack32(&msg->cpu_freq, buffer); safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer); @@ -7938,7 +7969,9 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** safe_unpack8(&msg->open_mode, buffer); safe_unpack8(&msg->pty, buffer); - safe_unpack16(&msg->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + msg->acctg_freq = xstrdup_printf("%u", uint16); safe_unpack32(&msg->cpu_freq, buffer); safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer); @@ -8034,7 +8067,9 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** safe_unpack8(&msg->open_mode, buffer); safe_unpack8(&msg->pty, buffer); - safe_unpack16(&msg->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + msg->acctg_freq = xstrdup_printf("%u", uint16); safe_unpack32(&msg->cpu_freq, buffer); safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer); @@ -9402,6 +9437,8 @@ static void _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer, uint16_t protocol_version) { + uint16_t uint16 = 0; + xassert(msg != NULL); if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { @@ -9418,7 +9455,7 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer, pack32(msg->array_job_id, buffer); pack16(msg->array_task_id, buffer); - pack16(msg->acctg_freq, buffer); + packstr(msg->acctg_freq, buffer); pack16(msg->cpu_bind_type, buffer); pack16(msg->cpus_per_task, buffer); pack16(msg->restart_cnt, buffer); @@ -9468,7 +9505,9 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer, pack8(msg->open_mode, buffer); pack8(msg->overcommit, buffer); - pack16(msg->acctg_freq, buffer); + if (msg->acctg_freq) + uint16 = atoi(msg->acctg_freq); + pack16(uint16, buffer); pack16(msg->cpu_bind_type, buffer); pack16(msg->cpus_per_task, buffer); pack16(msg->restart_cnt, buffer); @@ -9518,6 +9557,7 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer, uint16_t protocol_version) { uint32_t uint32_tmp; + uint16_t uint16 = 0; batch_job_launch_msg_t *launch_msg_ptr; xassert(msg != NULL); @@ -9538,7 +9578,8 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer, safe_unpack32(&launch_msg_ptr->array_job_id, buffer); safe_unpack16(&launch_msg_ptr->array_task_id, buffer); - safe_unpack16(&launch_msg_ptr->acctg_freq, buffer); + safe_unpackstr_xmalloc(&launch_msg_ptr->acctg_freq, + &uint32_tmp, buffer); safe_unpack16(&launch_msg_ptr->cpu_bind_type, buffer); safe_unpack16(&launch_msg_ptr->cpus_per_task, buffer); safe_unpack16(&launch_msg_ptr->restart_cnt, buffer); @@ -9610,7 +9651,11 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer, safe_unpack8(&launch_msg_ptr->open_mode, buffer); safe_unpack8(&launch_msg_ptr->overcommit, buffer); - safe_unpack16(&launch_msg_ptr->acctg_freq, buffer); + safe_unpack16(&uint16, buffer); + if (uint16) + launch_msg_ptr->acctg_freq = + xstrdup_printf("%u", uint16); + safe_unpack16(&launch_msg_ptr->cpu_bind_type, buffer); safe_unpack16(&launch_msg_ptr->cpus_per_task, buffer); safe_unpack16(&launch_msg_ptr->restart_cnt, buffer); diff --git a/src/salloc/opt.h b/src/salloc/opt.h index fd6ec17ac3e..dddf118918b 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -113,7 +113,8 @@ typedef struct salloc_options { bool hold; /* --hold, -H */ bool no_kill; /* --no-kill, -k */ - int acctg_freq; /* --acctg-freq=secs */ + char *acctg_freq; /* --acctg-freq=<type1>=<freq1>,*/ + /* <type2>=<freq2>,... */ char *licenses; /* --licenses, -L */ bool overcommit; /* --overcommit -O */ int kill_command_signal;/* --kill-command, -K */ diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 351f01f02ce..84f999745c8 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -658,8 +658,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->begin_time = opt.begin; if (opt.account) desc->account = xstrdup(opt.account); - if (opt.acctg_freq >= 0) - desc->acctg_freq = opt.acctg_freq; + if (opt.acctg_freq) + desc->acctg_freq = xstrdup(opt.acctg_freq); if (opt.comment) desc->comment = xstrdup(opt.comment); if (opt.qos) diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index b623a7e0394..cb36cd04fe4 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -375,7 +375,7 @@ static void _opt_default() opt.export_file = NULL; opt.get_user_env_time = -1; opt.get_user_env_mode = -1; - opt.acctg_freq = -1; + opt.acctg_freq = NULL; opt.reservation = NULL; opt.wckey = NULL; opt.req_switch = -1; @@ -1604,7 +1604,8 @@ static void _set_options(int argc, char **argv) } break; case LONG_OPT_ACCTG_FREQ: - opt.acctg_freq = _get_int(optarg, "acctg-freq"); + xfree(opt.acctg_freq); + opt.acctg_freq = xstrdup(optarg); break; case LONG_OPT_PROPAGATE: xfree(opt.propagate); diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h index ccbdd6b23af..2f4d3b92e36 100644 --- a/src/sbatch/opt.h +++ b/src/sbatch/opt.h @@ -120,7 +120,8 @@ typedef struct sbatch_options { bool no_kill; /* --no-kill, -k */ int requeue; /* --requeue and --no-requeue */ uint8_t open_mode; /* --open-mode */ - int acctg_freq; /* --acctg-freq=secs */ + char *acctg_freq; /* --acctg-freq=<type1>=<freq1>,*/ + /* <type2>=<freq2>,... */ bool overcommit; /* --overcommit -O */ uint16_t shared; /* --share, -s */ char *licenses; /* --licenses, -L */ diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index 16834765e7f..444f9893dc4 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -446,8 +446,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->requeue = opt.requeue; if (opt.open_mode) desc->open_mode = opt.open_mode; - if (opt.acctg_freq >= 0) - desc->acctg_freq = opt.acctg_freq; + if (opt.acctg_freq) + desc->acctg_freq = xstrdup(opt.acctg_freq); desc->ckpt_dir = opt.ckpt_dir; desc->ckpt_interval = (uint16_t)opt.ckpt_interval; diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 1e543fbad67..9a089bf9a83 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1667,7 +1667,7 @@ void _dump_job_details(struct job_details *detail_ptr, Buf buffer) pack32(detail_ptr->max_nodes, buffer); pack32(detail_ptr->num_tasks, buffer); - pack16(detail_ptr->acctg_freq, buffer); + packstr(detail_ptr->acctg_freq, buffer); pack16(detail_ptr->contiguous, buffer); pack16(detail_ptr->cpus_per_task, buffer); pack16(detail_ptr->nice, buffer); @@ -1715,8 +1715,9 @@ void _dump_job_details(struct job_details *detail_ptr, Buf buffer) static int _load_job_details(struct job_record *job_ptr, Buf buffer, uint16_t protocol_version) { - char *req_nodes = NULL, *exc_nodes = NULL, *features = NULL; - char *cpu_bind, *dependency = NULL, *orig_dependency = NULL, *mem_bind; + char *acctg_freq = NULL, *req_nodes = NULL, *exc_nodes = NULL; + char *features = NULL, *cpu_bind, *dependency = NULL; + char *orig_dependency = NULL, *mem_bind; char *err = NULL, *in = NULL, *out = NULL, *work_dir = NULL; char *ckpt_dir = NULL, *restart_dir = NULL; char **argv = (char **) NULL, **env_sup = (char **) NULL; @@ -1725,7 +1726,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer, uint32_t pn_min_cpus, pn_min_memory, pn_min_tmp_disk; uint32_t num_tasks, name_len, argc = 0, env_cnt = 0; uint16_t shared, contiguous, nice, ntasks_per_node; - uint16_t acctg_freq, cpus_per_task, requeue, task_dist; + uint16_t cpus_per_task, requeue, task_dist, tmp_uint16 = 0; uint16_t cpu_bind_type, mem_bind_type, plane_size; uint8_t open_mode, overcommit, prolog_running; time_t begin_time, submit_time; @@ -1740,7 +1741,58 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer, safe_unpack32(&max_nodes, buffer); safe_unpack32(&num_tasks, buffer); - safe_unpack16(&acctg_freq, buffer); + safe_unpackstr_xmalloc(&acctg_freq, &name_len, buffer); + safe_unpack16(&contiguous, buffer); + safe_unpack16(&cpus_per_task, buffer); + safe_unpack16(&nice, buffer); + safe_unpack16(&ntasks_per_node, buffer); + safe_unpack16(&requeue, buffer); + safe_unpack16(&shared, buffer); + safe_unpack16(&task_dist, buffer); + + safe_unpackstr_xmalloc(&cpu_bind, &name_len, buffer); + safe_unpack16(&cpu_bind_type, buffer); + safe_unpackstr_xmalloc(&mem_bind, &name_len, buffer); + safe_unpack16(&mem_bind_type, buffer); + safe_unpack16(&plane_size, buffer); + + safe_unpack8(&open_mode, buffer); + safe_unpack8(&overcommit, buffer); + safe_unpack8(&prolog_running, buffer); + + safe_unpack32(&pn_min_cpus, buffer); + safe_unpack32(&pn_min_memory, buffer); + safe_unpack32(&pn_min_tmp_disk, buffer); + safe_unpack_time(&begin_time, buffer); + safe_unpack_time(&submit_time, buffer); + + safe_unpackstr_xmalloc(&req_nodes, &name_len, buffer); + safe_unpackstr_xmalloc(&exc_nodes, &name_len, buffer); + safe_unpackstr_xmalloc(&features, &name_len, buffer); + safe_unpackstr_xmalloc(&dependency, &name_len, buffer); + safe_unpackstr_xmalloc(&orig_dependency, &name_len, buffer); + + safe_unpackstr_xmalloc(&err, &name_len, buffer); + safe_unpackstr_xmalloc(&in, &name_len, buffer); + safe_unpackstr_xmalloc(&out, &name_len, buffer); + safe_unpackstr_xmalloc(&work_dir, &name_len, buffer); + safe_unpackstr_xmalloc(&ckpt_dir, &name_len, buffer); + safe_unpackstr_xmalloc(&restart_dir, &name_len, buffer); + + if (unpack_multi_core_data(&mc_ptr, buffer, protocol_version)) + goto unpack_error; + safe_unpackstr_array(&argv, &argc, buffer); + safe_unpackstr_array(&env_sup, &env_cnt, buffer); + } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + safe_unpack32(&min_cpus, buffer); + safe_unpack32(&max_cpus, buffer); + safe_unpack32(&min_nodes, buffer); + safe_unpack32(&max_nodes, buffer); + safe_unpack32(&num_tasks, buffer); + + safe_unpack16(&tmp_uint16, buffer); + if (tmp_uint16) + acctg_freq = xstrdup_printf("%u", tmp_uint16); safe_unpack16(&contiguous, buffer); safe_unpack16(&cpus_per_task, buffer); safe_unpack16(&nice, buffer); @@ -1806,6 +1858,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer, } /* free any left-over detail data */ + xfree(job_ptr->details->acctg_freq); for (i=0; i<job_ptr->details->argc; i++) xfree(job_ptr->details->argv[i]); xfree(job_ptr->details->argv); @@ -1876,6 +1929,7 @@ unpack_error: /* for (i=0; i<argc; i++) xfree(argv[i]); Don't trust this on unpack error */ + xfree(acctg_freq); xfree(argv); xfree(cpu_bind); xfree(dependency); @@ -2496,7 +2550,7 @@ void dump_job_desc(job_desc_msg_t * job_specs) { long job_id, time_min; long pn_min_cpus, pn_min_memory, pn_min_tmp_disk, min_cpus; - long time_limit, priority, contiguous, acctg_freq; + long time_limit, priority, contiguous; long kill_on_node_fail, shared, immediate, wait_all_nodes; long cpus_per_task, requeue, num_tasks, overcommit; long ntasks_per_node, ntasks_per_socket, ntasks_per_core; @@ -2625,13 +2679,11 @@ void dump_job_desc(job_desc_msg_t * job_specs) (long) job_specs->num_tasks : -1L; overcommit = (job_specs->overcommit != (uint8_t) NO_VAL) ? (long) job_specs->overcommit : -1L; - acctg_freq = (job_specs->acctg_freq != (uint16_t) NO_VAL) ? - (long) job_specs->acctg_freq : -1L; debug3(" mail_type=%u mail_user=%s nice=%d num_tasks=%ld " - "open_mode=%u overcommit=%ld acctg_freq=%ld", + "open_mode=%u overcommit=%ld acctg_freq=%s", job_specs->mail_type, job_specs->mail_user, (int)job_specs->nice - NICE_OFFSET, num_tasks, - job_specs->open_mode, overcommit, acctg_freq); + job_specs->open_mode, overcommit, job_specs->acctg_freq); slurm_make_time_str(&job_specs->begin_time, buf, sizeof(buf)); cpus_per_task = (job_specs->cpus_per_task != (uint16_t) NO_VAL) ? @@ -5072,7 +5124,7 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, detail_ptr->argv = job_desc->argv; job_desc->argv = (char **) NULL; /* nothing left to free */ job_desc->argc = 0; /* nothing left to free */ - detail_ptr->acctg_freq = job_desc->acctg_freq; + detail_ptr->acctg_freq = xstrdup(job_desc->acctg_freq); detail_ptr->nice = job_desc->nice; detail_ptr->open_mode = job_desc->open_mode; detail_ptr->min_cpus = job_desc->min_cpus; @@ -10450,7 +10502,7 @@ _copy_job_record_to_job_desc(struct job_record *job_ptr) job_desc = xmalloc(sizeof(job_desc_msg_t)); job_desc->account = xstrdup(job_ptr->account); - job_desc->acctg_freq = details->acctg_freq; + job_desc->acctg_freq = xstrdup(details->acctg_freq); job_desc->alloc_node = xstrdup(job_ptr->alloc_node); /* Since the allocating salloc or srun is not expected to exist * when this checkpointed job is restarted, do not save these: diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 9c3eb90da68..36e3d050817 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1165,7 +1165,7 @@ extern batch_job_launch_msg_t *build_launch_job_msg(struct job_record *job_ptr) launch_msg_ptr->nodes = xstrdup(job_ptr->nodes); launch_msg_ptr->overcommit = job_ptr->details->overcommit; launch_msg_ptr->open_mode = job_ptr->details->open_mode; - launch_msg_ptr->acctg_freq = job_ptr->details->acctg_freq; + launch_msg_ptr->acctg_freq = xstrdup(job_ptr->details->acctg_freq); launch_msg_ptr->cpus_per_task = job_ptr->details->cpus_per_task; launch_msg_ptr->pn_min_memory = job_ptr->details->pn_min_memory; launch_msg_ptr->restart_cnt = job_ptr->restart_cnt; diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index a1e24c25e18..f87b5bc5141 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -528,7 +528,7 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->health_check_node_state = conf->health_check_node_state; conf_ptr->health_check_program = xstrdup(conf->health_check_program); - conf_ptr->job_acct_gather_freq = conf->job_acct_gather_freq; + conf_ptr->job_acct_gather_freq = xstrdup(conf->job_acct_gather_freq); conf_ptr->job_acct_gather_type = xstrdup(conf->job_acct_gather_type); conf_ptr->job_ckpt_dir = xstrdup(conf->job_ckpt_dir); @@ -3851,7 +3851,7 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, launch_msg_ptr->std_err = xstrdup(job_desc_msg->std_err); launch_msg_ptr->std_in = xstrdup(job_desc_msg->std_in); launch_msg_ptr->std_out = xstrdup(job_desc_msg->std_out); - launch_msg_ptr->acctg_freq = job_desc_msg->acctg_freq; + launch_msg_ptr->acctg_freq = xstrdup(job_desc_msg->acctg_freq); launch_msg_ptr->open_mode = job_desc_msg->open_mode; launch_msg_ptr->work_dir = xstrdup(job_desc_msg->work_dir); launch_msg_ptr->argc = job_desc_msg->argc; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index e5c44f7f22f..013efce7898 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -404,7 +404,7 @@ struct feature_record { /* job_details - specification of a job's constraints, * can be purged after initiation */ struct job_details { - uint16_t acctg_freq; /* accounting polling interval */ + char *acctg_freq; /* accounting polling interval */ uint32_t argc; /* count of argv elements */ char **argv; /* arguments for a batch job script */ time_t begin_time; /* start at this time (srun --begin), diff --git a/src/slurmd/common/slurmstepd_init.c b/src/slurmd/common/slurmstepd_init.c index a38f58d5d76..d3def654f8d 100644 --- a/src/slurmd/common/slurmstepd_init.c +++ b/src/slurmd/common/slurmstepd_init.c @@ -58,7 +58,7 @@ extern void pack_slurmd_conf_lite(slurmd_conf_t *conf, Buf buffer) packstr(conf->logfile, buffer); packstr(conf->task_prolog, buffer); packstr(conf->task_epilog, buffer); - pack16(conf->job_acct_gather_freq, buffer); + packstr(conf->job_acct_gather_freq, buffer); packstr(conf->job_acct_gather_type, buffer); pack16(conf->propagate_prio, buffer); pack32(conf->debug_flags, buffer); @@ -90,7 +90,8 @@ extern int unpack_slurmd_conf_lite_no_alloc(slurmd_conf_t *conf, Buf buffer) safe_unpackstr_xmalloc(&conf->logfile, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&conf->task_prolog, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&conf->task_epilog, &uint32_tmp, buffer); - safe_unpack16(&conf->job_acct_gather_freq, buffer); + safe_unpackstr_xmalloc(&conf->job_acct_gather_freq, &uint32_tmp, + buffer); safe_unpackstr_xmalloc(&conf->job_acct_gather_type, &uint32_tmp, buffer); safe_unpack16(&conf->propagate_prio, buffer); diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index dbbb826e0fa..06c50e6b439 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -887,8 +887,9 @@ _read_config(void) conf->debug_flags = cf->debug_flags; conf->propagate_prio = cf->propagate_prio_process; - conf->job_acct_gather_freq = cf->job_acct_gather_freq; + _free_and_set(&conf->job_acct_gather_freq, + xstrdup(cf->job_acct_gather_freq)); _free_and_set(&conf->acct_gather_energy_type, xstrdup(cf->acct_gather_energy_type)); _free_and_set(&conf->acct_gather_profile_type, diff --git a/src/slurmd/slurmd/slurmd.h b/src/slurmd/slurmd/slurmd.h index cd3bb4c9a74..3f3025324e3 100644 --- a/src/slurmd/slurmd/slurmd.h +++ b/src/slurmd/slurmd/slurmd.h @@ -133,8 +133,8 @@ typedef struct slurmd_config { uint16_t slurmd_timeout; /* SlurmdTimeout */ uid_t slurm_user_id; /* UID that slurmctld runs as */ pthread_mutex_t config_mutex; /* lock for slurmd_config access */ - uint16_t job_acct_gather_freq; - char *job_acct_gather_type; /* job accounting gather type */ + char *job_acct_gather_freq; + char *job_acct_gather_type; /* job accounting gather type */ char *acct_gather_energy_type; /* */ char *acct_gather_profile_type; /* */ uint16_t use_pam; diff --git a/src/srun/libsrun/allocate.c b/src/srun/libsrun/allocate.c index b90b96f9b39..4fabd66a719 100644 --- a/src/srun/libsrun/allocate.c +++ b/src/srun/libsrun/allocate.c @@ -593,8 +593,8 @@ job_desc_msg_create_from_opts (void) j->argv = (char **) xmalloc(sizeof(char *) * 2); j->argv[0] = xstrdup(opt.argv[0]); } - if (opt.acctg_freq >= 0) - j->acctg_freq = opt.acctg_freq; + if (opt.acctg_freq) + j->acctg_freq = xstrdup(opt.acctg_freq); j->reservation = opt.reservation; j->wckey = opt.wckey; diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c index 9c5014bab07..d8d6a908c5e 100644 --- a/src/srun/libsrun/opt.c +++ b/src/srun/libsrun/opt.c @@ -511,7 +511,7 @@ static void _opt_default() opt.pty = false; opt.open_mode = 0; - opt.acctg_freq = -1; + opt.acctg_freq = NULL; opt.cpu_freq = NO_VAL; opt.reservation = NULL; opt.wckey = NULL; @@ -1509,8 +1509,8 @@ static void _set_options(const int argc, char **argv) } break; case LONG_OPT_ACCTG_FREQ: - opt.acctg_freq = _get_int(optarg, "acctg-freq", - false); + xfree(opt.acctg_freq); + opt.acctg_freq = xstrdup(optarg); break; case LONG_OPT_CPU_FREQ: if (cpu_freq_verify_param(optarg, &opt.cpu_freq)) diff --git a/src/srun/libsrun/opt.h b/src/srun/libsrun/opt.h index 4fb4eda4629..5cb901a7f51 100644 --- a/src/srun/libsrun/opt.h +++ b/src/srun/libsrun/opt.h @@ -220,7 +220,8 @@ typedef struct srun_options { uint16_t mail_type; /* --mail-type */ char *mail_user; /* --mail-user */ uint8_t open_mode; /* --open-mode=append|truncate */ - int acctg_freq; /* --acctg-freq=secs */ + char *acctg_freq; /* --acctg-freq=<type1>=<freq1>,*/ + /* <type2>=<freq2>,... */ uint32_t cpu_freq; /* --cpu_freq=kilohertz */ bool pty; /* --pty */ char *restart_dir; /* --restart */ -- GitLab