From c3c0663c6d0f9670050f815de7b00aafb05efe2e Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 18 Jan 2011 23:43:30 +0000 Subject: [PATCH] Added new configuration parameter MaxJobId. Once reached, start job value at FirstJobId. --- NEWS | 2 + RELEASE_NOTES | 6 + doc/html/configurator.html.in | 1 + doc/man/man5/slurm.conf.5 | 10 + slurm/slurm.h.in | 1 + src/api/config_info.c | 7 + src/common/read_config.c | 9 +- src/common/read_config.h | 1 + src/common/slurm_protocol_pack.c | 478 ++++++++++++++++++++++++++++++- src/slurmctld/job_mgr.c | 12 +- src/slurmctld/proc_req.c | 1 + 11 files changed, 517 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index 26d2ac209d6..3dd8df2cf05 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,8 @@ documents those changes that are of interest to users and admins. a batch job's script. -- Add ability to create reservations or partitions and submit batch jobs using sview. Also add the ability to delete reservations and partitions. + -- Added new configuration parameter MaxJobId. Once reached, start job value + at FirstJobId. * Changes in SLURM 2.2.1 ======================== diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 4acf5578316..8ce2525bb7f 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -37,6 +37,9 @@ CONFIGURATION FILE CHANGES (see "man slurm.conf" for details) * DebugFlags of Frontend added +* Added new configuration parameter MaxJobId. Use with FirstJobId to limit + range of job ID values. + COMMAND CHANGES (see man pages for details) =========================================== @@ -68,6 +71,9 @@ front_end_info_t entirely new structure job_info_t batch_host name of the host running the batch script +slurm_ctl_conf + max_job_id maximum supported job id before starting over + with first_job_id slurm_step_layout front_end name of front end host running the step diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in index 4e41421cff9..39a6772a0ce 100644 --- a/doc/html/configurator.html.in +++ b/doc/html/configurator.html.in @@ -152,6 +152,7 @@ function displayfile() get_field("Epilog",document.config.epilog) + "<br>" + "#PrologSlurmctld= <br>" + "#FirstJobId=1 <br>" + + "#MaxJobId=999999 <br>" + "#GresTypes= <br>" + "#GroupUpdateForce=0 <br>" + "#GroupUpdateTime=600 <br>" + diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index ca353a1568b..e2614172b54 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -486,6 +486,7 @@ specific requested value. Job id values generated will incremented by 1 for each subsequent job. This may be used to provide a meta\-scheduler with a job id space which is disjoint from the interactive jobs. The default value is 1. +Also see \fBMaxJobId\fR .TP \fBGetEnvTimeout\fR @@ -737,6 +738,15 @@ jobs will fail. The default value is 10000 jobs. This value may not be reset via "scontrol reconfig". It only takes effect upon restart of the slurmctld daemon. +.TP +\fBMaxJobId\fR +The maximum job id to be used for jobs submitted to SLURM without a +specific requested value. Job id values generated will incremented by 1 +for each subsequent job. This may be used to provide a meta\-scheduler +with a job id space which is disjoint from the interactive jobs. +The default value is 4294901760 (0xffff0000). +Also see \fBFirstJobId\fR + .TP \fBMaxMemPerCPU\fR Maximum real memory size available per allocated CPU in MegaBytes. diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 234d4426689..a814de1e6f6 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -1760,6 +1760,7 @@ typedef struct slurm_ctl_conf { char *licenses; /* licenses available on this cluster */ char *mail_prog; /* pathname of mail program */ uint32_t max_job_cnt; /* maximum number of active jobs */ + uint32_t max_job_id; /* maximum job id before using first_job_id */ uint32_t max_mem_per_cpu; /* maximum MB memory per allocated CPU */ uint16_t max_tasks_per_node; /* maximum tasks per node */ uint16_t min_job_age; /* COMPLETED jobs over this age (secs) diff --git a/src/api/config_info.c b/src/api/config_info.c index e52d499e2bf..125f29540dd 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -538,6 +538,13 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); + snprintf(tmp_str, sizeof(tmp_str), "%u", + slurm_ctl_conf_ptr->max_job_id); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("MaxJobId"); + key_pair->value = xstrdup(tmp_str); + list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); list_append(ret_list, key_pair); key_pair->name = xstrdup("MaxMemPerCPU"); diff --git a/src/common/read_config.c b/src/common/read_config.c index db15d904aac..1dc532d30cf 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -205,6 +205,7 @@ s_p_options_t slurm_conf_options[] = { {"Licenses", S_P_STRING}, {"MailProg", S_P_STRING}, {"MaxJobCount", S_P_UINT32}, + {"MaxJobId", S_P_UINT32}, {"MaxMemPerCPU", S_P_UINT32}, {"MaxMemPerNode", S_P_UINT32}, {"MaxTasksPerNode", S_P_UINT16}, @@ -1780,7 +1781,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) xfree (ctl_conf_ptr->epilog); ctl_conf_ptr->epilog_msg_time = (uint32_t) NO_VAL; ctl_conf_ptr->fast_schedule = (uint16_t) NO_VAL; - ctl_conf_ptr->first_job_id = (uint32_t) NO_VAL; + ctl_conf_ptr->first_job_id = NO_VAL; ctl_conf_ptr->get_env_timeout = 0; xfree(ctl_conf_ptr->gres_plugins); ctl_conf_ptr->group_info = (uint16_t) NO_VAL; @@ -1805,6 +1806,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) xfree (ctl_conf_ptr->licenses); xfree (ctl_conf_ptr->mail_prog); ctl_conf_ptr->max_job_cnt = (uint16_t) NO_VAL; + ctl_conf_ptr->max_job_id = NO_VAL; ctl_conf_ptr->max_mem_per_cpu = 0; ctl_conf_ptr->min_job_age = (uint16_t) NO_VAL; xfree (ctl_conf_ptr->mpi_default); @@ -2232,6 +2234,8 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_uint32(&conf->first_job_id, "FirstJobId", hashtbl)) conf->first_job_id = DEFAULT_FIRST_JOB_ID; + if (!s_p_get_uint32(&conf->max_job_id, "MaxJobId", hashtbl)) + conf->max_job_id = DEFAULT_MAX_JOB_ID; s_p_get_string(&conf->gres_plugins, "GresTypes", hashtbl); @@ -2355,6 +2359,9 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_uint32(&conf->max_job_cnt, "MaxJobCount", hashtbl)) conf->max_job_cnt = DEFAULT_MAX_JOB_COUNT; + if (!s_p_get_uint32(&conf->max_job_id, "MaxJobId", hashtbl)) + conf->max_job_id = DEFAULT_MAX_JOB_ID; + if (s_p_get_uint32(&conf->max_mem_per_cpu, "MaxMemPerCPU", hashtbl)) { conf->max_mem_per_cpu |= MEM_PER_CPU; diff --git a/src/common/read_config.h b/src/common/read_config.h index d5b6af06331..f1d3c7d129b 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -87,6 +87,7 @@ extern char *default_plugstack; #define DEFAULT_KILL_WAIT 30 #define DEFAULT_MAIL_PROG "/bin/mail" #define DEFAULT_MAX_JOB_COUNT 10000 +#define DEFAULT_MAX_JOB_ID 0xffff0000 #define DEFAULT_MEM_PER_CPU 0 #define DEFAULT_MAX_MEM_PER_CPU 0 #define DEFAULT_MIN_JOB_AGE 300 diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 8a24769f655..bf3ce13be50 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -54,6 +54,7 @@ #include "src/common/slurm_accounting_storage.h" #include "src/common/slurm_jobacct_gather.h" #include "src/common/pack.h" +#include "src/common/read_config.h" #include "src/common/slurm_auth.h" #include "src/common/slurm_cred.h" #include "src/common/slurm_protocol_api.h" @@ -4064,7 +4065,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, uint16_t uint16_tmp; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); - if(protocol_version >= SLURM_2_2_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { pack_time(build_ptr->last_update, buffer); pack16(build_ptr->accounting_storage_enforce, buffer); @@ -4136,6 +4137,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, packstr(build_ptr->mail_prog, buffer); pack32(build_ptr->max_job_cnt, buffer); + pack32(build_ptr->max_job_id, buffer); pack32(build_ptr->max_mem_per_cpu, buffer); pack16(build_ptr->max_tasks_per_node, buffer); pack16(build_ptr->min_job_age, buffer); @@ -4258,7 +4260,201 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack16(build_ptr->z_16, buffer); pack32(build_ptr->z_32, buffer); packstr(build_ptr->z_char, buffer); - } else if(protocol_version >= SLURM_2_1_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) { + pack_time(build_ptr->last_update, buffer); + + pack16(build_ptr->accounting_storage_enforce, buffer); + packstr(build_ptr->accounting_storage_backup_host, buffer); + packstr(build_ptr->accounting_storage_host, buffer); + packstr(build_ptr->accounting_storage_loc, buffer); + pack32(build_ptr->accounting_storage_port, buffer); + packstr(build_ptr->accounting_storage_type, buffer); + packstr(build_ptr->accounting_storage_user, buffer); + + packstr(build_ptr->authtype, buffer); + + packstr(build_ptr->backup_addr, buffer); + packstr(build_ptr->backup_controller, buffer); + pack16(build_ptr->batch_start_timeout, buffer); + pack_time(build_ptr->boot_time, buffer); + + packstr(build_ptr->checkpoint_type, buffer); + packstr(build_ptr->cluster_name, buffer); + pack16(build_ptr->complete_wait, buffer); + packstr(build_ptr->control_addr, buffer); + packstr(build_ptr->control_machine, buffer); + packstr(build_ptr->crypto_type, buffer); + + pack32(build_ptr->def_mem_per_cpu, buffer); + pack32(build_ptr->debug_flags, buffer); + pack16(build_ptr->disable_root_jobs, buffer); + + pack16(build_ptr->enforce_part_limits, buffer); + packstr(build_ptr->epilog, buffer); + pack32(build_ptr->epilog_msg_time, buffer); + packstr(build_ptr->epilog_slurmctld, buffer); + + pack16(build_ptr->fast_schedule, buffer); + pack32(build_ptr->first_job_id, buffer); + + pack16(build_ptr->get_env_timeout, buffer); + packstr(build_ptr->gres_plugins, buffer); + pack16(build_ptr->group_info, buffer); + + pack32(build_ptr->hash_val, buffer); + + pack16(build_ptr->health_check_interval, buffer); + packstr(build_ptr->health_check_program, buffer); + + pack16(build_ptr->inactive_limit, buffer); + + pack16(build_ptr->job_acct_gather_freq, buffer); + packstr(build_ptr->job_acct_gather_type, buffer); + + packstr(build_ptr->job_ckpt_dir, buffer); + + packstr(build_ptr->job_comp_host, buffer); + packstr(build_ptr->job_comp_loc, buffer); + pack32((uint32_t)build_ptr->job_comp_port, buffer); + packstr(build_ptr->job_comp_type, buffer); + packstr(build_ptr->job_comp_user, buffer); + + packstr(build_ptr->job_credential_private_key, buffer); + packstr(build_ptr->job_credential_public_certificate, buffer); + pack16(build_ptr->job_file_append, buffer); + pack16(build_ptr->job_requeue, buffer); + packstr(build_ptr->job_submit_plugins, buffer); + + pack16(build_ptr->kill_on_bad_exit, buffer); + pack16(build_ptr->kill_wait, buffer); + + packstr(build_ptr->licenses, buffer); + + packstr(build_ptr->mail_prog, buffer); + pack32(build_ptr->max_job_cnt, buffer); + pack32(build_ptr->max_mem_per_cpu, buffer); + pack16(build_ptr->max_tasks_per_node, buffer); + pack16(build_ptr->min_job_age, buffer); + packstr(build_ptr->mpi_default, buffer); + packstr(build_ptr->mpi_params, buffer); + pack16(build_ptr->msg_timeout, buffer); + + pack32(build_ptr->next_job_id, buffer); + packstr(build_ptr->node_prefix, buffer); + + pack16(build_ptr->over_time_limit, buffer); + + packstr(build_ptr->plugindir, buffer); + packstr(build_ptr->plugstack, buffer); + pack16(build_ptr->preempt_mode, buffer); + packstr(build_ptr->preempt_type, buffer); + + pack32(build_ptr->priority_decay_hl, buffer); + pack32(build_ptr->priority_calc_period, buffer); + pack16(build_ptr->priority_favor_small, buffer); + pack32(build_ptr->priority_max_age, buffer); + pack16(build_ptr->priority_reset_period, buffer); + packstr(build_ptr->priority_type, buffer); + pack32(build_ptr->priority_weight_age, buffer); + pack32(build_ptr->priority_weight_fs, buffer); + pack32(build_ptr->priority_weight_js, buffer); + pack32(build_ptr->priority_weight_part, buffer); + pack32(build_ptr->priority_weight_qos, buffer); + + pack16(build_ptr->private_data, buffer); + packstr(build_ptr->proctrack_type, buffer); + packstr(build_ptr->prolog, buffer); + packstr(build_ptr->prolog_slurmctld, buffer); + pack16(build_ptr->propagate_prio_process, buffer); + packstr(build_ptr->propagate_rlimits, buffer); + packstr(build_ptr->propagate_rlimits_except, buffer); + + packstr(build_ptr->resume_program, buffer); + pack16(build_ptr->resume_rate, buffer); + pack16(build_ptr->resume_timeout, buffer); + pack16(build_ptr->resv_over_run, buffer); + pack16(build_ptr->ret2service, buffer); + + packstr(build_ptr->salloc_default_command, buffer); + packstr(build_ptr->sched_params, buffer); + pack16(build_ptr->schedport, buffer); + pack16(build_ptr->schedrootfltr, buffer); + packstr(build_ptr->sched_logfile, buffer); + pack16(build_ptr->sched_log_level, buffer); + pack16(build_ptr->sched_time_slice, buffer); + packstr(build_ptr->schedtype, buffer); + packstr(build_ptr->select_type, buffer); + if(build_ptr->select_conf_key_pairs) + count = list_count(build_ptr->select_conf_key_pairs); + + pack32(count, buffer); + if(count && count != NO_VAL) { + ListIterator itr = list_iterator_create( + (List)build_ptr->select_conf_key_pairs); + config_key_pair_t *key_pair = NULL; + while((key_pair = list_next(itr))) { + pack_config_key_pair(key_pair, + protocol_version, buffer); + } + list_iterator_destroy(itr); + } + count = NO_VAL; + + pack16(build_ptr->select_type_param, buffer); + + packstr(build_ptr->slurm_conf, buffer); + pack32(build_ptr->slurm_user_id, buffer); + packstr(build_ptr->slurm_user_name, buffer); + pack32(build_ptr->slurmd_user_id, buffer); + packstr(build_ptr->slurmd_user_name, buffer); + + pack16(build_ptr->slurmctld_debug, buffer); + packstr(build_ptr->slurmctld_logfile, buffer); + packstr(build_ptr->slurmctld_pidfile, buffer); + pack32(build_ptr->slurmctld_port, buffer); + pack16(build_ptr->slurmctld_port_count, buffer); + pack16(build_ptr->slurmctld_timeout, buffer); + + pack16(build_ptr->slurmd_debug, buffer); + packstr(build_ptr->slurmd_logfile, buffer); + packstr(build_ptr->slurmd_pidfile, buffer); + if(!(cluster_flags & CLUSTER_FLAG_MULTSD)) + pack32(build_ptr->slurmd_port, buffer); + + packstr(build_ptr->slurmd_spooldir, buffer); + pack16(build_ptr->slurmd_timeout, buffer); + packstr(build_ptr->srun_epilog, buffer); + packstr(build_ptr->srun_prolog, buffer); + packstr(build_ptr->state_save_location, buffer); + packstr(build_ptr->suspend_exc_nodes, buffer); + packstr(build_ptr->suspend_exc_parts, buffer); + packstr(build_ptr->suspend_program, buffer); + pack16(build_ptr->suspend_rate, buffer); + pack32(build_ptr->suspend_time, buffer); + pack16(build_ptr->suspend_timeout, buffer); + packstr(build_ptr->switch_type, buffer); + + packstr(build_ptr->task_epilog, buffer); + packstr(build_ptr->task_prolog, buffer); + packstr(build_ptr->task_plugin, buffer); + pack16(build_ptr->task_plugin_param, buffer); + packstr(build_ptr->tmp_fs, buffer); + packstr(build_ptr->topology_plugin, buffer); + pack16(build_ptr->track_wckey, buffer); + pack16(build_ptr->tree_width, buffer); + + pack16(build_ptr->use_pam, buffer); + packstr(build_ptr->unkillable_program, buffer); + pack16(build_ptr->unkillable_timeout, buffer); + packstr(build_ptr->version, buffer); + pack16(build_ptr->vsize_factor, buffer); + + pack16(build_ptr->wait_time, buffer); + + pack16(build_ptr->z_16, buffer); + pack32(build_ptr->z_32, buffer); + packstr(build_ptr->z_char, buffer); + } else if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) { pack_time(build_ptr->last_update, buffer); pack16(build_ptr->accounting_storage_enforce, buffer); @@ -4471,7 +4667,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, build_ptr->hash_val = NO_VAL; /* load the data values */ - if(protocol_version >= SLURM_2_2_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { /* unpack timestamp of snapshot */ safe_unpack_time(&build_ptr->last_update, buffer); @@ -4574,6 +4770,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpackstr_xmalloc(&build_ptr->mail_prog, &uint32_tmp, buffer); safe_unpack32(&build_ptr->max_job_cnt, buffer); + safe_unpack32(&build_ptr->max_job_id, buffer); safe_unpack32(&build_ptr->max_mem_per_cpu, buffer); safe_unpack16(&build_ptr->max_tasks_per_node, buffer); safe_unpack16(&build_ptr->min_job_age, buffer); @@ -4645,7 +4842,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpackstr_xmalloc(&build_ptr->select_type, &uint32_tmp, buffer); safe_unpack32(&count, buffer); - if(count != NO_VAL) { + if (count != NO_VAL) { List tmp_list = list_create(destroy_config_key_pair); config_key_pair_t *object = NULL; int i; @@ -4685,7 +4882,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, buffer); safe_unpackstr_xmalloc(&build_ptr->slurmd_pidfile, &uint32_tmp, buffer); - if(!(cluster_flags & CLUSTER_FLAG_MULTSD)) + if (!(cluster_flags & CLUSTER_FLAG_MULTSD)) safe_unpack32(&build_ptr->slurmd_port, buffer); safe_unpackstr_xmalloc(&build_ptr->slurmd_spooldir, @@ -4738,7 +4935,275 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack32(&build_ptr->z_32, buffer); safe_unpackstr_xmalloc(&build_ptr->z_char, &uint32_tmp, buffer); - } else if(protocol_version >= SLURM_2_1_PROTOCOL_VERSION) { + } else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) { + /* unpack timestamp of snapshot */ + safe_unpack_time(&build_ptr->last_update, buffer); + + safe_unpack16(&build_ptr->accounting_storage_enforce, buffer); + safe_unpackstr_xmalloc( + &build_ptr->accounting_storage_backup_host, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->accounting_storage_host, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->accounting_storage_loc, + &uint32_tmp, buffer); + safe_unpack32(&build_ptr->accounting_storage_port, buffer); + safe_unpackstr_xmalloc(&build_ptr->accounting_storage_type, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->accounting_storage_user, + &uint32_tmp, buffer); + + safe_unpackstr_xmalloc(&build_ptr->authtype, + &uint32_tmp, buffer); + + safe_unpackstr_xmalloc(&build_ptr->backup_addr, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->backup_controller, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->batch_start_timeout, buffer); + safe_unpack_time(&build_ptr->boot_time, buffer); + + safe_unpackstr_xmalloc(&build_ptr->checkpoint_type, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->cluster_name, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->complete_wait, buffer); + safe_unpackstr_xmalloc(&build_ptr->control_addr, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->control_machine, + &uint32_tmp,buffer); + safe_unpackstr_xmalloc(&build_ptr->crypto_type, &uint32_tmp, + buffer); + + safe_unpack32(&build_ptr->def_mem_per_cpu, buffer); + safe_unpack32(&build_ptr->debug_flags, buffer); + safe_unpack16(&build_ptr->disable_root_jobs, buffer); + + safe_unpack16(&build_ptr->enforce_part_limits, buffer); + safe_unpackstr_xmalloc(&build_ptr->epilog, &uint32_tmp, + buffer); + safe_unpack32(&build_ptr->epilog_msg_time, buffer); + safe_unpackstr_xmalloc(&build_ptr->epilog_slurmctld, + &uint32_tmp, buffer); + + safe_unpack16(&build_ptr->fast_schedule, buffer); + safe_unpack32(&build_ptr->first_job_id, buffer); + + safe_unpack16(&build_ptr->get_env_timeout, buffer); + safe_unpackstr_xmalloc(&build_ptr->gres_plugins, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->group_info, buffer); + + safe_unpack32(&build_ptr->hash_val, buffer); + + safe_unpack16(&build_ptr->health_check_interval, buffer); + safe_unpackstr_xmalloc(&build_ptr->health_check_program, + &uint32_tmp, buffer); + + safe_unpack16(&build_ptr->inactive_limit, buffer); + + safe_unpack16(&build_ptr->job_acct_gather_freq, buffer); + safe_unpackstr_xmalloc(&build_ptr->job_acct_gather_type, + &uint32_tmp, buffer); + + safe_unpackstr_xmalloc(&build_ptr->job_ckpt_dir, + &uint32_tmp, buffer); + + safe_unpackstr_xmalloc(&build_ptr->job_comp_host, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->job_comp_loc, + &uint32_tmp, buffer); + safe_unpack32(&build_ptr->job_comp_port, buffer); + safe_unpackstr_xmalloc(&build_ptr->job_comp_type, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->job_comp_user, + &uint32_tmp, buffer); + + safe_unpackstr_xmalloc(&build_ptr->job_credential_private_key, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr-> + job_credential_public_certificate, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->job_file_append, buffer); + safe_unpack16(&build_ptr->job_requeue, buffer); + safe_unpackstr_xmalloc(&build_ptr->job_submit_plugins, + &uint32_tmp, buffer); + + safe_unpack16(&build_ptr->kill_on_bad_exit, buffer); + safe_unpack16(&build_ptr->kill_wait, buffer); + + safe_unpackstr_xmalloc(&build_ptr->licenses, + &uint32_tmp, buffer); + + safe_unpackstr_xmalloc(&build_ptr->mail_prog, + &uint32_tmp, buffer); + safe_unpack32(&build_ptr->max_job_cnt, buffer); + build_ptr->max_job_id = DEFAULT_MAX_JOB_ID; + safe_unpack32(&build_ptr->max_mem_per_cpu, buffer); + safe_unpack16(&build_ptr->max_tasks_per_node, buffer); + safe_unpack16(&build_ptr->min_job_age, buffer); + safe_unpackstr_xmalloc(&build_ptr->mpi_default, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->mpi_params, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->msg_timeout, buffer); + + safe_unpack32(&build_ptr->next_job_id, buffer); + safe_unpackstr_xmalloc(&build_ptr->node_prefix, + &uint32_tmp, buffer); + + safe_unpack16(&build_ptr->over_time_limit, buffer); + + safe_unpackstr_xmalloc(&build_ptr->plugindir, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->plugstack, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->preempt_mode, buffer); + safe_unpackstr_xmalloc(&build_ptr->preempt_type, + &uint32_tmp, buffer); + + safe_unpack32(&build_ptr->priority_decay_hl, buffer); + safe_unpack32(&build_ptr->priority_calc_period, buffer); + safe_unpack16(&build_ptr->priority_favor_small, buffer); + safe_unpack32(&build_ptr->priority_max_age, buffer); + safe_unpack16(&build_ptr->priority_reset_period, buffer); + safe_unpackstr_xmalloc(&build_ptr->priority_type, &uint32_tmp, + buffer); + safe_unpack32(&build_ptr->priority_weight_age, buffer); + safe_unpack32(&build_ptr->priority_weight_fs, buffer); + safe_unpack32(&build_ptr->priority_weight_js, buffer); + safe_unpack32(&build_ptr->priority_weight_part, buffer); + safe_unpack32(&build_ptr->priority_weight_qos, buffer); + + safe_unpack16(&build_ptr->private_data, buffer); + safe_unpackstr_xmalloc(&build_ptr->proctrack_type, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&build_ptr->prolog, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&build_ptr->prolog_slurmctld, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->propagate_prio_process, buffer); + safe_unpackstr_xmalloc(&build_ptr->propagate_rlimits, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->propagate_rlimits_except, + &uint32_tmp, buffer); + + safe_unpackstr_xmalloc(&build_ptr->resume_program, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->resume_rate, buffer); + safe_unpack16(&build_ptr->resume_timeout, buffer); + safe_unpack16(&build_ptr->resv_over_run, buffer); + safe_unpack16(&build_ptr->ret2service, buffer); + + safe_unpackstr_xmalloc(&build_ptr->salloc_default_command, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->sched_params, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->schedport, buffer); + safe_unpack16(&build_ptr->schedrootfltr, buffer); + safe_unpackstr_xmalloc(&build_ptr->sched_logfile, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->sched_log_level, buffer); + safe_unpack16(&build_ptr->sched_time_slice, buffer); + safe_unpackstr_xmalloc(&build_ptr->schedtype, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->select_type, + &uint32_tmp, buffer); + safe_unpack32(&count, buffer); + if (count != NO_VAL) { + List tmp_list = list_create(destroy_config_key_pair); + config_key_pair_t *object = NULL; + int i; + for(i=0; i<count; i++) { + if(unpack_config_key_pair( + (void *)&object, protocol_version, + buffer) + == SLURM_ERROR) + goto unpack_error; + list_append(tmp_list, object); + } + build_ptr->select_conf_key_pairs = (void *)tmp_list; + } + + safe_unpack16(&build_ptr->select_type_param, buffer); + + safe_unpackstr_xmalloc(&build_ptr->slurm_conf, + &uint32_tmp, buffer); + safe_unpack32(&build_ptr->slurm_user_id, buffer); + safe_unpackstr_xmalloc(&build_ptr->slurm_user_name, + &uint32_tmp, buffer); + safe_unpack32(&build_ptr->slurmd_user_id, buffer); + safe_unpackstr_xmalloc(&build_ptr->slurmd_user_name, + &uint32_tmp, buffer); + + safe_unpack16(&build_ptr->slurmctld_debug, buffer); + safe_unpackstr_xmalloc(&build_ptr->slurmctld_logfile, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->slurmctld_pidfile, + &uint32_tmp, buffer); + safe_unpack32(&build_ptr->slurmctld_port, buffer); + safe_unpack16(&build_ptr->slurmctld_port_count, buffer); + safe_unpack16(&build_ptr->slurmctld_timeout, buffer); + + safe_unpack16(&build_ptr->slurmd_debug, buffer); + safe_unpackstr_xmalloc(&build_ptr->slurmd_logfile, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&build_ptr->slurmd_pidfile, &uint32_tmp, + buffer); + if (!(cluster_flags & CLUSTER_FLAG_MULTSD)) + safe_unpack32(&build_ptr->slurmd_port, buffer); + + safe_unpackstr_xmalloc(&build_ptr->slurmd_spooldir, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->slurmd_timeout, buffer); + + safe_unpackstr_xmalloc(&build_ptr->srun_epilog, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->srun_prolog, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->state_save_location, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->suspend_exc_nodes, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->suspend_exc_parts, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->suspend_program, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->suspend_rate, buffer); + safe_unpack32(&build_ptr->suspend_time, buffer); + safe_unpack16(&build_ptr->suspend_timeout, buffer); + safe_unpackstr_xmalloc(&build_ptr->switch_type, + &uint32_tmp, buffer); + + safe_unpackstr_xmalloc(&build_ptr->task_epilog, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->task_prolog, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&build_ptr->task_plugin, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->task_plugin_param, buffer); + safe_unpackstr_xmalloc(&build_ptr->tmp_fs, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&build_ptr->topology_plugin, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->track_wckey, buffer); + safe_unpack16(&build_ptr->tree_width, buffer); + + safe_unpack16(&build_ptr->use_pam, buffer); + safe_unpackstr_xmalloc(&build_ptr->unkillable_program, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->unkillable_timeout, buffer); + safe_unpackstr_xmalloc(&build_ptr->version, + &uint32_tmp, buffer); + safe_unpack16(&build_ptr->vsize_factor, buffer); + + safe_unpack16(&build_ptr->wait_time, buffer); + + safe_unpack16(&build_ptr->z_16, buffer); + safe_unpack32(&build_ptr->z_32, buffer); + safe_unpackstr_xmalloc(&build_ptr->z_char, &uint32_tmp, + buffer); + } else if (protocol_version >= SLURM_2_1_PROTOCOL_VERSION) { uint16_t max_job_cnt; char *tmp_str = NULL; /* unpack timestamp of snapshot */ @@ -4844,6 +5309,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpackstr_xmalloc(&build_ptr->mail_prog, &uint32_tmp, buffer); max_job_cnt = MIN(build_ptr->max_job_cnt, 0xfffe); + build_ptr->max_job_id = DEFAULT_MAX_JOB_ID; safe_unpack16(&max_job_cnt, buffer); safe_unpack32(&build_ptr->max_mem_per_cpu, buffer); safe_unpack16(&build_ptr->max_tasks_per_node, buffer); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index f34604d5961..d811fcfe542 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -5947,7 +5947,7 @@ extern uint32_t get_next_job_id(void) job_id_sequence = MAX(job_id_sequence, slurmctld_conf.first_job_id); next_id = job_id_sequence + 1; - if (next_id >= MIN_NOALLOC_JOBID) + if (next_id >= slurmctld_conf.max_job_id) next_id = slurmctld_conf.first_job_id; return next_id; } @@ -5958,6 +5958,7 @@ extern uint32_t get_next_job_id(void) */ static void _set_job_id(struct job_record *job_ptr) { + int i; uint32_t new_id; job_id_sequence = MAX(job_id_sequence, slurmctld_conf.first_job_id); @@ -5969,15 +5970,18 @@ static void _set_job_id(struct job_record *job_ptr) fatal("_set_job_id: partition not set"); /* Insure no conflict in job id if we roll over 32 bits */ - while (1) { - if (++job_id_sequence >= MIN_NOALLOC_JOBID) + for (i = 0; i < 1000; i++) { + if (++job_id_sequence >= slurmctld_conf.max_job_id) job_id_sequence = slurmctld_conf.first_job_id; new_id = job_id_sequence; if (find_job_record(new_id) == NULL) { job_ptr->job_id = new_id; - break; + return; } } + fatal("We have exhausted our supply of valid job id values." + "FirstJobId=%u MaxJobId=%u", slurmctld_conf.first_job_id, + slurmctld_conf.max_job_id); } diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 53b937d03c1..4a3bdafab60 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -507,6 +507,7 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->mail_prog = xstrdup(conf->mail_prog); conf_ptr->max_job_cnt = conf->max_job_cnt; + conf_ptr->max_job_id = conf->max_job_id; conf_ptr->max_mem_per_cpu = conf->max_mem_per_cpu; conf_ptr->max_tasks_per_node = conf->max_tasks_per_node; conf_ptr->min_job_age = conf->min_job_age; -- GitLab