From f64a84bf4285b4bdb1ef3e4dcd5d7631df3818a8 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Wed, 8 May 2013 16:03:07 -0700 Subject: [PATCH] change profile from a char * to a uint32_t --- slurm/slurm.h.in | 23 +++++-- src/common/slurm_acct_gather_profile.c | 84 ++++++++++++++++++++++++++ src/common/slurm_acct_gather_profile.h | 10 +++ src/common/slurm_protocol_defs.c | 3 - src/common/slurm_protocol_defs.h | 2 +- src/common/slurm_protocol_pack.c | 14 ++--- src/salloc/opt.c | 21 ++++--- src/salloc/opt.h | 2 +- src/salloc/salloc.c | 2 +- src/sbatch/opt.c | 20 +++--- src/sbatch/opt.h | 2 +- src/slurmctld/job_mgr.c | 19 +++--- src/slurmctld/slurmctld.h | 2 +- src/slurmd/slurmstepd/slurmstepd_job.c | 2 +- src/slurmd/slurmstepd/slurmstepd_job.h | 2 +- src/srun/libsrun/opt.c | 18 +++--- src/srun/libsrun/opt.h | 2 +- 17 files changed, 170 insertions(+), 58 deletions(-) diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 2abc4ea875b..bb66f9da896 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -408,6 +408,19 @@ enum select_node_cnt { * base partition count */ }; +enum acct_gather_profile_info { + ACCT_GATHER_PROFILE_DIR, /* Give directory profiling is stored */ + ACCT_GATHER_PROFILE_DEFAULT /* What is being collected for profiling */ +}; + +#define ACCT_GATHER_PROFILE_NOT_SET 0x00000000 +#define ACCT_GATHER_PROFILE_NONE 0x00000001 +#define ACCT_GATHER_PROFILE_ENERGY 0x00000002 +#define ACCT_GATHER_PROFILE_TASK 0x00000004 +#define ACCT_GATHER_PROFILE_LUSTRE 0x00000008 +#define ACCT_GATHER_PROFILE_NETWORK 0x00000010 +#define ACCT_GATHER_PROFILE_ALL 0xffffffff + /* jobacct data types */ enum jobacct_data_type { JOBACCT_DATA_TOTAL, /* data-> jobacctinfo_t * */ @@ -1008,7 +1021,7 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ uint32_t priority; /* relative priority of the job, * explicitly set only for user root, * 0 == held (don't initiate) */ - char *profile; /* Level of acct_gather_profile {all | none} */ + uint32_t profile; /* Level of acct_gather_profile {all | none} */ char *qos; /* Quality of Service */ char *resp_host; /* NOTE: Set by slurmctld */ char *req_nodes; /* comma separated list of required nodes @@ -1151,7 +1164,7 @@ typedef struct job_info { time_t pre_sus_time; /* time job ran prior to last suspend */ uint32_t priority; /* relative priority of the job, * 0=held, 1=required nodes DOWN/DRAINED */ - char *profile; /* Level of acct_gather_profile {all | none} */ + uint32_t profile; /* Level of acct_gather_profile {all | none} */ char *qos; /* Quality of Service */ char *req_nodes; /* comma separated list of required nodes */ int *req_node_inx; /* required list index pairs into node_table: @@ -1319,7 +1332,7 @@ typedef struct { char *gres; /* generic resources needed */ char *name; /* name of the job step */ char *network; /* network use spec */ - char *profile; /* Level of acct_gather_profile {all | none} */ + uint32_t profile; /* Level of acct_gather_profile {all | none} */ uint8_t no_kill; /* 1 if no kill on node failure */ uint32_t min_nodes; /* minimum number of nodes required by job, * default=0 */ @@ -1366,7 +1379,7 @@ typedef struct { bool multi_prog; uint32_t slurmd_debug; /* remote slurmd debug level */ bool parallel_debug; - char *profile; /* Level of acct_gather_profile {all | none} */ + uint32_t profile; /* Level of acct_gather_profile {all | none} */ char *task_prolog; char *task_epilog; uint16_t cpu_bind_type; /* use cpu_bind_type_t */ @@ -1435,7 +1448,7 @@ typedef struct { uint32_t cpu_freq; /* requested cpu frequency */ uint32_t num_tasks; /* number of tasks */ char *partition; /* name of assigned partition */ - char *profile; /* Level of acct_gather_profile {all | none} */ + uint32_t profile; /* Level of acct_gather_profile {all | none} */ char *resv_ports; /* ports allocated for MPI */ time_t run_time; /* net run time (factor out time suspended) */ dynamic_plugin_data_t *select_jobinfo; /* opaque data type, diff --git a/src/common/slurm_acct_gather_profile.c b/src/common/slurm_acct_gather_profile.c index 0a43c6c36d4..dce4a5cd1df 100644 --- a/src/common/slurm_acct_gather_profile.c +++ b/src/common/slurm_acct_gather_profile.c @@ -144,6 +144,90 @@ extern int acct_gather_profile_fini(void) return rc; } +extern char *acct_gather_profile_to_string(uint32_t profile) +{ + static char profile_str[128]; + + profile_str[0] = '\0'; + if (profile == ACCT_GATHER_PROFILE_NOT_SET) + strcat(profile_str, "NotSet"); + else if (profile == ACCT_GATHER_PROFILE_NONE) + strcat(profile_str, "None"); + else { + if (profile & ACCT_GATHER_PROFILE_ENERGY) + strcat(profile_str, "Energy"); + if (profile & ACCT_GATHER_PROFILE_LUSTRE) { + if (profile_str[0]) + strcat(profile_str, ","); + strcat(profile_str, "Lustre"); + } + if (profile & ACCT_GATHER_PROFILE_NETWORK) { + if (profile_str[0]) + strcat(profile_str, ","); + strcat(profile_str, "Network"); + } + if (profile & ACCT_GATHER_PROFILE_TASK) { + if (profile_str[0]) + strcat(profile_str, ","); + strcat(profile_str, "Task"); + } + } + return profile_str; +} + +extern uint32_t acct_gather_profile_from_string(char *profile_str) +{ + uint32_t profile = ACCT_GATHER_PROFILE_NOT_SET; + + if (!profile_str) { + } else if (slurm_strcasestr(profile_str, "none")) + profile = ACCT_GATHER_PROFILE_NONE; + else if (slurm_strcasestr(profile_str, "all")) + profile = ACCT_GATHER_PROFILE_ALL; + else { + if (slurm_strcasestr(profile_str, "energy")) + profile |= ACCT_GATHER_PROFILE_ENERGY; + if (slurm_strcasestr(profile_str, "task")) + profile |= ACCT_GATHER_PROFILE_TASK; + + if (slurm_strcasestr(profile_str, "lustre")) + profile |= ACCT_GATHER_PROFILE_LUSTRE; + + if (slurm_strcasestr(profile_str, "network")) + profile |= ACCT_GATHER_PROFILE_NETWORK; + } + + return profile; +} + +extern char *acct_gather_profile_series_to_string(uint32_t series) +{ + if (series == ACCT_GATHER_PROFILE_ENERGY) + return "Energy"; + else if (series == ACCT_GATHER_PROFILE_TASK) + return "Task"; + else if (series == ACCT_GATHER_PROFILE_LUSTRE) + return "Lustre"; + else if (series == ACCT_GATHER_PROFILE_NETWORK) + return "Network"; + + return "Unknown"; +} + +extern uint32_t acct_gather_profile_series_from_string(char *series_str) +{ + if (strcasecmp(series_str, "energy")) + return ACCT_GATHER_PROFILE_ENERGY; + else if (strcasecmp(series_str, "task")) + return ACCT_GATHER_PROFILE_TASK; + else if (strcasecmp(series_str, "lustre")) + return ACCT_GATHER_PROFILE_LUSTRE; + else if (strcasecmp(series_str, "network")) + return ACCT_GATHER_PROFILE_NETWORK; + + return ACCT_GATHER_PROFILE_NOT_SET; +} + extern void acct_gather_profile_g_conf_options(s_p_options_t **full_options, int *full_options_cnt) { diff --git a/src/common/slurm_acct_gather_profile.h b/src/common/slurm_acct_gather_profile.h index 24aae19e5a6..71be774080c 100644 --- a/src/common/slurm_acct_gather_profile.h +++ b/src/common/slurm_acct_gather_profile.h @@ -75,6 +75,16 @@ extern int acct_gather_profile_init(void); * Unload the plugin */ extern int acct_gather_profile_fini(void); + +/* translate uint32_t profile to string (DO NOT free) */ +extern char *acct_gather_profile_to_string(uint32_t profile); + +/* translate string of words to uint32_t filled in with bits set to profile */ +extern uint32_t acct_gather_profile_from_string(char *profile_str); + +extern char *acct_gather_profile_series_to_string(uint32_t series); +extern uint32_t acct_gather_profile_series_from_string(char *series_str); + /* * Define plugin local conf for acct_gather.conf * diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index e57db262ef6..39b523b044e 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -415,7 +415,6 @@ extern void slurm_free_job_desc_msg(job_desc_msg_t * msg) xfree(msg->network); xfree(msg->std_out); xfree(msg->partition); - xfree(msg->profile); xfree(msg->ramdiskimage); xfree(msg->req_nodes); xfree(msg->reservation); @@ -501,7 +500,6 @@ extern void slurm_free_job_info_members(job_info_t * job) xfree(job->node_inx); xfree(job->nodes); xfree(job->partition); - xfree(job->profile); xfree(job->qos); xfree(job->req_node_inx); xfree(job->req_nodes); @@ -730,7 +728,6 @@ extern void slurm_free_launch_tasks_request_msg(launch_tasks_request_msg_t * msg xfree(msg->ofname); xfree(msg->efname); - xfree(msg->profile); xfree(msg->task_prolog); xfree(msg->task_epilog); xfree(msg->complete_nodelist); diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 1178fd5cdca..4df81c7ed84 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -706,7 +706,7 @@ typedef struct launch_tasks_request_msg { uint16_t *io_port; /* array of available client IO listen ports */ /********** END "normal" IO only options **********/ - char *profile; + uint32_t profile; char *task_prolog; char *task_epilog; diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 9e518af7ab1..cc41ae4e9e7 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -4435,7 +4435,6 @@ _unpack_job_info_members(job_info_t * job, Buf buffer, job->ntasks_per_node = (uint16_t)NO_VAL; -//<<<<<<< slurm_protocol_pack.c nlk temp remove if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpack32(&job->array_job_id, buffer); safe_unpack16(&job->array_task_id, buffer); @@ -4443,6 +4442,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer, safe_unpack32(&job->job_id, buffer); safe_unpack32(&job->user_id, buffer); safe_unpack32(&job->group_id, buffer); + safe_unpack32(&job->profile, buffer); safe_unpack16(&job->job_state, buffer); safe_unpack16(&job->batch_flag, buffer); @@ -4473,7 +4473,6 @@ _unpack_job_info_members(job_info_t * job, Buf buffer, safe_unpackstr_xmalloc(&job->gres, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->batch_host, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->batch_script, &uint32_tmp, buffer); - safe_unpackstr_xmalloc(&job->profile, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->qos, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->licenses, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->state_desc, &uint32_tmp, buffer); @@ -6312,8 +6311,6 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, { /* load the data values */ if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { -//SMD if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { -//RBS: other half of job_desc_msg. I don't think we have to replicate, but we do have to use 2_4 pack16(job_desc_ptr->contiguous, buffer); pack16(job_desc_ptr->task_dist, buffer); pack16(job_desc_ptr->kill_on_node_fail, buffer); @@ -6335,7 +6332,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, packstr(job_desc_ptr->account, buffer); packstr(job_desc_ptr->comment, buffer); pack16(job_desc_ptr->nice, buffer); - packstr(job_desc_ptr->profile, buffer); + pack32(job_desc_ptr->profile, buffer); packstr(job_desc_ptr->qos, buffer); pack8(job_desc_ptr->open_mode, buffer); @@ -6806,8 +6803,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, safe_unpackstr_xmalloc(&job_desc_ptr->comment, &uint32_tmp, buffer); safe_unpack16(&job_desc_ptr->nice, buffer); - safe_unpackstr_xmalloc(&job_desc_ptr->profile, &uint32_tmp, - buffer); + safe_unpack32(&job_desc_ptr->profile, buffer); safe_unpackstr_xmalloc(&job_desc_ptr->qos, &uint32_tmp, buffer); @@ -7525,7 +7521,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, for (i = 0; i < msg->num_io_port; i++) pack16(msg->io_port[i], buffer); } - packstr(msg->profile, buffer); + pack32(msg->profile, buffer); packstr(msg->task_prolog, buffer); packstr(msg->task_epilog, buffer); pack16(msg->slurmd_debug, buffer); @@ -7782,7 +7778,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** buffer); } } - safe_unpackstr_xmalloc(&msg->profile, &uint32_tmp, buffer); + safe_unpack32(&msg->profile, buffer); safe_unpackstr_xmalloc(&msg->task_prolog, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->task_epilog, &uint32_tmp, buffer); safe_unpack16(&msg->slurmd_debug, buffer); diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 286d74a2311..35b6ebef445 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -83,6 +83,7 @@ #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_resource_info.h" #include "src/common/slurm_rlimits_info.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -114,6 +115,7 @@ #define OPT_SIGNAL 0x15 #define OPT_KILL_CMD 0x16 #define OPT_TIME_VAL 0x17 +#define OPT_PROFILE 0x18 /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_CPU_BIND 0x101 @@ -305,7 +307,7 @@ static void _opt_default() opt.time_min = NO_VAL; opt.time_min_str = NULL; opt.partition = NULL; - opt.profile = NULL; + opt.profile = ACCT_GATHER_PROFILE_NOT_SET; opt.job_name = NULL; opt.jobid = NO_VAL; @@ -401,7 +403,7 @@ env_vars_t env_vars[] = { {"SALLOC_NO_ROTATE", OPT_NO_ROTATE, NULL, NULL }, {"SALLOC_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SALLOC_PARTITION", OPT_STRING, &opt.partition, NULL }, - {"SALLOC_PROFILE", OPT_STRING, &opt.profile, NULL }, + {"SALLOC_PROFILE", OPT_PROFILE, NULL, NULL }, {"SALLOC_QOS", OPT_STRING, &opt.qos, NULL }, {"SALLOC_RESERVATION", OPT_STRING, &opt.reservation, NULL }, {"SALLOC_SIGNAL", OPT_SIGNAL, NULL, NULL }, @@ -567,7 +569,9 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_TIME_VAL: opt.wait4switch = time_str2secs(val); break; - + case OPT_PROFILE: + opt.profile = acct_gather_profile_from_string((char *)val); + break; default: /* do nothing */ break; @@ -670,7 +674,7 @@ void set_options(const int argc, char **argv) {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, {"qos", required_argument, 0, LONG_OPT_QOS}, - {"profile", optional_argument, 0, LONG_OPT_PROFILE}, + {"profile", required_argument, 0, LONG_OPT_PROFILE}, {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, {"reboot", no_argument, 0, LONG_OPT_REBOOT}, {"reservation", required_argument, 0, LONG_OPT_RESERVATION}, @@ -1013,8 +1017,7 @@ void set_options(const int argc, char **argv) opt.jobid = _get_int(optarg, "jobid"); break; case LONG_OPT_PROFILE: - xfree(opt.profile); - opt.profile = xstrdup(optarg); + opt.profile = acct_gather_profile_from_string(optarg); break; case LONG_OPT_COMMENT: xfree(opt.comment); @@ -1579,7 +1582,8 @@ static bool _opt_verify(void) } if (opt.profile) - setenvfs("SLURM_PROFILE=%s", opt.profile); + setenvfs("SLURM_PROFILE=%s", + acct_gather_profile_to_string(opt.profile)); return verified; } @@ -1764,7 +1768,8 @@ static void _opt_list(void) if (opt.gres != NULL) info("gres : %s", opt.gres); info("network : %s", opt.network); - info("profile : `%s'", opt.profile); + info("profile : `%s'", + acct_gather_profile_to_string(opt.profile)); info("qos : %s", opt.qos); str = print_constraints(); info("constraints : %s", str); diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 1780b86fe8e..fd6ec17ac3e 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -94,7 +94,7 @@ typedef struct salloc_options { int time_min; /* --min-time (int minutes) */ char *time_min_str; /* --min-time (string) */ char *partition; /* --partition=n, -p n */ - char *profile; /* --profile=[all | none} */ + uint32_t profile; /* --profile=[all | none} */ enum task_dist_states distribution; /* --distribution=, -m dist */ uint32_t plane_size; /* lllp distribution -> plane_size for diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 3059402cc60..351f01f02ce 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -614,7 +614,7 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->immediate = 1; desc->name = xstrdup(opt.job_name); desc->reservation = xstrdup(opt.reservation); - desc->profile = xstrdup(opt.profile); + desc->profile = opt.profile; desc->wckey = xstrdup(opt.wckey); if (opt.req_switch >= 0) desc->req_switch = opt.req_switch; diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index c3b29def74f..b623a7e0394 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -82,6 +82,7 @@ #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_resource_info.h" #include "src/common/slurm_rlimits_info.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -116,6 +117,7 @@ #define OPT_CLUSTERS 0x18 #define OPT_TIME_VAL 0x19 #define OPT_ARRAY_INX 0x20 +#define OPT_PROFILE 0x21 /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_PROPAGATE 0x100 @@ -362,7 +364,7 @@ static void _opt_default() opt.euid = (uid_t) -1; opt.egid = (gid_t) -1; - opt.profile = NULL; /* acct_gather_profile selection */ + opt.profile = ACCT_GATHER_PROFILE_NOT_SET; opt.propagate = NULL; /* propagate specific rlimits */ opt.ifname = xstrdup("/dev/null"); @@ -474,7 +476,7 @@ env_vars_t env_vars[] = { {"SBATCH_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL }, {"SBATCH_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SBATCH_PARTITION", OPT_STRING, &opt.partition, NULL }, - {"SBATCH_PROFILE", OPT_STRING, &opt.profile, NULL }, + {"SBATCH_PROFILE", OPT_PROFILE, NULL, NULL }, {"SBATCH_QOS", OPT_STRING, &opt.qos, NULL }, {"SBATCH_RAMDISK_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL }, {"SBATCH_REQUEUE", OPT_REQUEUE, NULL, NULL }, @@ -658,6 +660,9 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_TIME_VAL: opt.wait4switch = time_str2secs(val); break; + case OPT_PROFILE: + opt.profile = acct_gather_profile_from_string((char *)val); + break; default: /* do nothing */ break; @@ -746,7 +751,7 @@ static struct option long_options[] = { {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE}, {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, - {"profile", optional_argument, 0, LONG_OPT_PROFILE}, + {"profile", required_argument, 0, LONG_OPT_PROFILE}, {"qos", required_argument, 0, LONG_OPT_QOS}, {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, {"reboot", no_argument, 0, LONG_OPT_REBOOT}, @@ -1487,8 +1492,7 @@ static void _set_options(int argc, char **argv) opt.requeue = 1; break; case LONG_OPT_PROFILE: - xfree(opt.profile); - opt.profile = xstrdup(optarg); + opt.profile = acct_gather_profile_from_string(optarg); break; case LONG_OPT_COMMENT: xfree(opt.comment); @@ -2445,7 +2449,8 @@ static bool _opt_verify(void) setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency); if (opt.profile) - setenvfs("SLURM_PROFILE=%s", opt.profile); + setenvfs("SLURM_PROFILE=%s", + acct_gather_profile_to_string(opt.profile)); if (opt.acctg_freq >= 0) @@ -2722,7 +2727,8 @@ static void _opt_list(void) opt.jobid_set ? "(set)" : "(default)"); info("partition : %s", opt.partition == NULL ? "default" : opt.partition); - info("profile : `%s'", opt.profile); + info("profile : `%s'", + acct_gather_profile_to_string(opt.profile)); info("job name : `%s'", opt.job_name); info("reservation : `%s'", opt.reservation); info("wckey : `%s'", opt.wckey); diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h index 796e42af4a6..ccbdd6b23af 100644 --- a/src/sbatch/opt.h +++ b/src/sbatch/opt.h @@ -96,7 +96,7 @@ typedef struct sbatch_options { int time_min; /* --min-time (int minutes) */ char *time_min_str; /* --min-time (string) */ char *partition; /* --partition=n, -p n */ - char *profile; /* --profile=[all | none} */ + uint32_t profile; /* --profile=[all | none} */ enum task_dist_states distribution; /* --distribution=, -m dist */ uint32_t plane_size; /* lllp distribution -> plane_size for diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 4f4fafa932f..45b26de860c 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -789,6 +789,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) pack32(dump_job_ptr->qos_id, buffer); pack32(dump_job_ptr->req_switch, buffer); pack32(dump_job_ptr->wait4switch, buffer); + pack32(dump_job_ptr->profile, buffer); pack_time(dump_job_ptr->preempt_time, buffer); pack_time(dump_job_ptr->start_time, buffer); @@ -844,7 +845,6 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) packstr(dump_job_ptr->network, buffer); packstr(dump_job_ptr->licenses, buffer); packstr(dump_job_ptr->mail_user, buffer); - packstr(dump_job_ptr->profile, buffer); packstr(dump_job_ptr->resv_name, buffer); packstr(dump_job_ptr->batch_host, buffer); @@ -893,6 +893,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) uint32_t next_step_id, total_cpus, total_nodes = 0, cpu_cnt; uint32_t resv_id, spank_job_env_size = 0, qos_id, derived_ec = 0; uint32_t array_job_id = 0, req_switch = 0, wait4switch = 0; + uint32_t profile = ACCT_GATHER_PROFILE_NOT_SET; time_t start_time, end_time, suspend_time, pre_sus_time, tot_sus_time; time_t preempt_time = 0; time_t resize_time = 0, now = time(NULL); @@ -908,7 +909,6 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) uint16_t limit_set_time = 0, limit_set_qos = 0; char *nodes = NULL, *partition = NULL, *name = NULL, *resp_host = NULL; char *account = NULL, *network = NULL, *mail_user = NULL; - char *profile = NULL; char *comment = NULL, *nodes_completing = NULL, *alloc_node = NULL; char *licenses = NULL, *state_desc = NULL, *wckey = NULL; char *resv_name = NULL, *gres = NULL, *batch_host = NULL; @@ -966,6 +966,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) safe_unpack32(&qos_id, buffer); safe_unpack32(&req_switch, buffer); safe_unpack32(&wait4switch, buffer); + safe_unpack32(&profile, buffer); safe_unpack_time(&preempt_time, buffer); safe_unpack_time(&start_time, buffer); @@ -1036,7 +1037,6 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) safe_unpackstr_xmalloc(&network, &name_len, buffer); safe_unpackstr_xmalloc(&licenses, &name_len, buffer); safe_unpackstr_xmalloc(&mail_user, &name_len, buffer); - safe_unpackstr_xmalloc(&profile, &name_len, buffer); safe_unpackstr_xmalloc(&resv_name, &name_len, buffer); safe_unpackstr_xmalloc(&batch_host, &name_len, buffer); @@ -1466,9 +1466,6 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) xfree(job_ptr->mail_user); job_ptr->mail_user = mail_user; mail_user = NULL; /* reused, nothing left to free */ - xfree(job_ptr->profile); - job_ptr->profile = profile; - profile = NULL; /* reused, nothing left to free */ xfree(job_ptr->name); /* in case duplicate record */ job_ptr->name = name; name = NULL; /* reused, nothing left to free */ @@ -1540,6 +1537,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) job_ptr->limit_set_qos = limit_set_qos; job_ptr->req_switch = req_switch; job_ptr->wait4switch = wait4switch; + job_ptr->profile = profile; /* This needs to always to initialized to "true". The select plugin will deal with it every time it goes through the logic if req_switch or wait4switch are set. @@ -2773,7 +2771,7 @@ struct job_record *_job_rec_copy(struct job_record *job_ptr) job_ptr_new->node_bitmap_cg = bit_copy(job_ptr->node_bitmap_cg); job_ptr_new->nodes_completing = xstrdup(job_ptr->nodes_completing); job_ptr_new->partition = xstrdup(job_ptr->partition); - job_ptr_new->profile = xstrdup(job_ptr->profile); + job_ptr_new->profile = job_ptr->profile; job_ptr_new->part_ptr_list = part_list_copy(job_ptr->part_ptr_list); if (job_ptr->prio_factors) { i = sizeof(priority_factors_object_t); @@ -4989,8 +4987,8 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, return error_code; job_ptr->partition = xstrdup(job_desc->partition); - if (job_desc->profile) - job_ptr->profile = xstrdup(job_desc->profile); + if (job_desc->profile != ACCT_GATHER_PROFILE_NOT_SET) + job_ptr->profile = job_desc->profile; if (job_desc->job_id != NO_VAL) { /* already confirmed unique */ job_ptr->job_id = job_desc->job_id; @@ -5563,7 +5561,6 @@ static void _list_delete_job(void *job_entry) xfree(job_ptr->nodes); xfree(job_ptr->nodes_completing); xfree(job_ptr->partition); - xfree(job_ptr->profile); FREE_NULL_LIST(job_ptr->part_ptr_list); xfree(job_ptr->priority_array); slurm_destroy_priority_factors_object(job_ptr->prio_factors); @@ -5817,6 +5814,7 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, pack32(dump_job_ptr->job_id, buffer); pack32(dump_job_ptr->user_id, buffer); pack32(dump_job_ptr->group_id, buffer); + pack32(dump_job_ptr->profile, buffer); pack16(dump_job_ptr->job_state, buffer); pack16(dump_job_ptr->batch_flag, buffer); @@ -5886,7 +5884,6 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, } else { packnull(buffer); } - packstr(dump_job_ptr->profile, buffer); assoc_mgr_lock(&locks); if (assoc_mgr_qos_list) { diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 3020dab6224..6324a3cbbf2 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -583,7 +583,7 @@ struct job_record { uint32_t *priority_array; /* partition based priority */ priority_factors_object_t *prio_factors; /* cached value used * by sprio command */ - char *profile; /* Acct_gather_profile option */ + uint32_t profile; /* Acct_gather_profile option */ uint32_t qos_id; /* quality of service id */ void *qos_ptr; /* pointer to the quality of * service record used for diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index 84985646874..54762b5b75c 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -281,7 +281,7 @@ job_create(launch_tasks_request_msg_t *msg) job->buffered_stdio = msg->buffered_stdio; job->labelio = msg->labelio; - job->profile = xstrdup(msg->profile); + job->profile = msg->profile; job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index 0a754592db3..41d30918d49 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -147,7 +147,7 @@ typedef struct slurmd_job { bool run_prolog; /* true if need to run prolog */ bool user_managed_io; time_t timelimit; /* time at which job must stop */ - char *profile; /* Level of acct_gather_profile */ + uint32_t profile; /* Level of acct_gather_profile */ char *task_prolog; /* per-task prolog */ char *task_epilog; /* per-task epilog */ struct passwd *pwd; /* saved passwd struct for user job */ diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c index db2beaed183..9c5014bab07 100644 --- a/src/srun/libsrun/opt.c +++ b/src/srun/libsrun/opt.c @@ -82,6 +82,7 @@ #include "src/common/slurm_protocol_interface.h" #include "src/common/slurm_rlimits_info.h" #include "src/common/slurm_resource_info.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -119,6 +120,7 @@ #define OPT_SIGNAL 0x17 #define OPT_TIME_VAL 0x18 #define OPT_CPU_FREQ 0x19 +#define OPT_PROFILE 0x20 /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_HELP 0x100 @@ -488,7 +490,7 @@ static void _opt_default() opt.egid = (gid_t) -1; opt.propagate = NULL; /* propagate specific rlimits */ - opt.profile = NULL; /* acct_gather_profile selection */ + opt.profile = ACCT_GATHER_PROFILE_NOT_SET; opt.prolog = slurm_get_srun_prolog(); opt.epilog = slurm_get_srun_epilog(); @@ -583,7 +585,7 @@ env_vars_t env_vars[] = { {"SLURM_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL }, {"SLURM_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SLURM_PARTITION", OPT_STRING, &opt.partition, NULL }, -{"SLURM_PROFILE", OPT_STRING, &opt.profile, NULL }, +{"SLURM_PROFILE", OPT_PROFILE, NULL, NULL }, {"SLURM_PROLOG", OPT_STRING, &opt.prolog, NULL }, {"SLURM_QOS", OPT_STRING, &opt.qos, NULL }, {"SLURM_RAMDISK_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL }, @@ -764,7 +766,9 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_TIME_VAL: opt.wait4switch = time_str2secs(val); break; - + case OPT_PROFILE: + opt.profile = acct_gather_profile_from_string((char *)val); + break; default: /* do nothing */ break; @@ -888,7 +892,7 @@ static void _set_options(const int argc, char **argv) {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE}, - {"profile", optional_argument, 0, LONG_OPT_PROFILE}, + {"profile", required_argument, 0, LONG_OPT_PROFILE}, {"prolog", required_argument, 0, LONG_OPT_PROLOG}, {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, {"pty", no_argument, 0, LONG_OPT_PTY}, @@ -1518,8 +1522,7 @@ static void _set_options(const int argc, char **argv) opt.wckey = xstrdup(optarg); break; case LONG_OPT_PROFILE: - xfree(opt.profile); - opt.profile = xstrdup(optarg); + opt.profile = acct_gather_profile_from_string(optarg); break; case LONG_OPT_RESERVATION: xfree(opt.reservation); @@ -2276,7 +2279,8 @@ static void _opt_list(void) opt.jobid_set ? "(set)" : "(default)"); info("partition : %s", opt.partition == NULL ? "default" : opt.partition); - info("profile : `%s'", opt.profile); + info("profile : `%s'", + acct_gather_profile_to_string(opt.profile)); info("job name : `%s'", opt.job_name); info("reservation : `%s'", opt.reservation); info("wckey : `%s'", opt.wckey); diff --git a/src/srun/libsrun/opt.h b/src/srun/libsrun/opt.h index 862d29561ff..4fb4eda4629 100644 --- a/src/srun/libsrun/opt.h +++ b/src/srun/libsrun/opt.h @@ -173,7 +173,7 @@ typedef struct srun_options { bool parallel_debug; /* srun controlled by debugger */ bool debugger_test; /* --debugger-test */ bool test_only; /* --test-only */ - char *profile; /* --profile=[all | none} */ + uint32_t profile; /* --profile=[all | none} */ char *propagate; /* --propagate[=RLIMIT_CORE,...]*/ char *task_epilog; /* --task-epilog= */ char *task_prolog; /* --task-prolog= */ -- GitLab