config_info.c 18.19 KiB
/****************************************************************************\
* config_info.c - get/print the system configuration information of slurm
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Morris Jette <jette1@llnl.gov> and Kevin Tew <tew1@llnl.gov>.
* LLNL-CODE-402394.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.llnl.gov/linux/slurm/>.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <errno.h>
#include <stdio.h>
#include <slurm/slurm.h>
#include "src/api/job_info.h"
#include "src/common/parse_time.h"
#include "src/common/slurm_auth.h"
#include "src/common/slurm_protocol_api.h"
/*
* slurm_api_version - Return a single number reflecting the SLURM API's
* version number. Use the macros SLURM_VERSION_NUM, SLURM_VERSION_MAJOR,
* SLURM_VERSION_MINOR, and SLURM_VERSION_MICRO to work with this value
* RET API's version number
*/
extern long slurm_api_version (void)
{
return (long) SLURM_API_VERSION;
}
static char *
_select_info(uint16_t select_type_param)
{
switch (select_type_param) {
case SELECT_TYPE_INFO_NONE:
return "NONE";
case CR_CPU:
return "CR_CPU";
case CR_SOCKET:
return "CR_SOCKET";
case CR_CORE:
return "CR_CORE";
case CR_MEMORY:
return "CR_MEMORY";
case CR_SOCKET_MEMORY:
return "CR_SOCKET_MEMORY";
case CR_CORE_MEMORY:
return "CR_CORE_MEMORY";
case CR_CPU_MEMORY:
return "CR_CPU_MEMORY";
default:
return "unknown";
}
}
static char *_task_plugin_param(uint16_t task_plugin_param)
{
switch(task_plugin_param) {
case TASK_PARAM_NONE:
return "none";
case TASK_PARAM_CPUSETS:
return "cpusets";
case TASK_PARAM_SCHED:
return "sched";
default:
return "unknown";
}
}
/*
* slurm_print_ctl_conf - output the contents of slurm control configuration
* message as loaded using slurm_load_ctl_conf
* IN out - file to write to
* IN slurm_ctl_conf_ptr - slurm control configuration pointer
*/
void slurm_print_ctl_conf ( FILE* out,
slurm_ctl_conf_info_msg_t * slurm_ctl_conf_ptr )
{
char time_str[32], tmp_str[128];
if ( slurm_ctl_conf_ptr == NULL )
return ;
slurm_make_time_str ((time_t *)&slurm_ctl_conf_ptr->last_update,
time_str, sizeof(time_str));
fprintf(out, "Configuration data as of %s\n", time_str);
fprintf(out, "AccountingStorageEnforce = %u\n",
slurm_ctl_conf_ptr->accounting_storage_enforce);
fprintf(out, "AccountingStorageHost = %s\n",
slurm_ctl_conf_ptr->accounting_storage_host);
fprintf(out, "AccountingStorageLoc = %s\n",
slurm_ctl_conf_ptr->accounting_storage_loc);
fprintf(out, "AccountingStoragePass = %s\n",
slurm_ctl_conf_ptr->accounting_storage_pass);
fprintf(out, "AccountingStoragePort = %u\n",
slurm_ctl_conf_ptr->accounting_storage_port);
fprintf(out, "AccountingStorageType = %s\n",
slurm_ctl_conf_ptr->accounting_storage_type);
fprintf(out, "AccountingStorageUser = %s\n",
slurm_ctl_conf_ptr->accounting_storage_user);
fprintf(out, "AuthType = %s\n",
slurm_ctl_conf_ptr->authtype);
fprintf(out, "BackupAddr = %s\n",
slurm_ctl_conf_ptr->backup_addr);
fprintf(out, "BackupController = %s\n",
slurm_ctl_conf_ptr->backup_controller);
slurm_make_time_str ((time_t *)&slurm_ctl_conf_ptr->boot_time,
time_str, sizeof(time_str));
fprintf(out, "BOOT_TIME = %s\n",
time_str);
fprintf(out, "CacheGroups = %u\n",
slurm_ctl_conf_ptr->cache_groups);
fprintf(out, "CheckpointType = %s\n",
slurm_ctl_conf_ptr->checkpoint_type);
fprintf(out, "ClusterName = %s\n",
slurm_ctl_conf_ptr->cluster_name);
fprintf(out, "ControlAddr = %s\n",
slurm_ctl_conf_ptr->control_addr);
fprintf(out, "ControlMachine = %s\n",
slurm_ctl_conf_ptr->control_machine);
fprintf(out, "CryptoType = %s\n",
slurm_ctl_conf_ptr->crypto_type);
if (slurm_ctl_conf_ptr->def_mem_per_task & MEM_PER_CPU) {
fprintf(out, "DefMemPerCPU = %u\n",
slurm_ctl_conf_ptr->def_mem_per_task &
(~MEM_PER_CPU));
} else if (slurm_ctl_conf_ptr->def_mem_per_task) {
fprintf(out, "DefMemPerNode = %u\n",
slurm_ctl_conf_ptr->def_mem_per_task);
} else
fprintf(out, "DefMemPerCPU = UNLIMITED\n");
if (slurm_ctl_conf_ptr->disable_root_jobs)
fprintf(out, "DisableRootJobs = YES\n");
else
fprintf(out, "DisableRootJobs = NO\n");
#if 0
/* Add in Slurm v1.4 */
if (slurm_ctl_conf_ptr->enforce_part_limits)
fprintf(out, "EnforcePartLimits = YES\n");
else
fprintf(out, "EnforcePartLimits = NO\n");
#endif
fprintf(out, "Epilog = %s\n",
slurm_ctl_conf_ptr->epilog);
fprintf(out, "EpilogMsgTime = %u\n",
slurm_ctl_conf_ptr->epilog_msg_time);
fprintf(out, "FastSchedule = %u\n",
slurm_ctl_conf_ptr->fast_schedule);
fprintf(out, "FirstJobId = %u\n",
slurm_ctl_conf_ptr->first_job_id);
fprintf(out, "GetEnvTimeout = %u\n",
slurm_ctl_conf_ptr->get_env_timeout);
fprintf(out, "HealthCheckInterval = %u\n",
slurm_ctl_conf_ptr->health_check_interval);
fprintf(out, "HealthCheckProgram = %s\n",
slurm_ctl_conf_ptr->health_check_program);
#ifdef HAVE_XCPU
fprintf(out, "HAVE_XCPU = %d\n", HAVE_XCPU);
#endif
fprintf(out, "InactiveLimit = %u\n",
slurm_ctl_conf_ptr->inactive_limit);
fprintf(out, "JobAcctGatherFrequency = %u\n",
slurm_ctl_conf_ptr->job_acct_gather_freq);
fprintf(out, "JobAcctGatherType = %s\n",
slurm_ctl_conf_ptr->job_acct_gather_type);
fprintf(out, "JobCompHost = %s\n",
slurm_ctl_conf_ptr->job_comp_host);
fprintf(out, "JobCompLoc = %s\n",
slurm_ctl_conf_ptr->job_comp_loc);
fprintf(out, "JobCompPass = %s\n",
slurm_ctl_conf_ptr->job_comp_pass);
fprintf(out, "JobCompPort = %u\n",
slurm_ctl_conf_ptr->job_comp_port);
fprintf(out, "JobCompType = %s\n",
slurm_ctl_conf_ptr->job_comp_type);
fprintf(out, "JobCompUser = %s\n",
slurm_ctl_conf_ptr->job_comp_user);
fprintf(out, "JobCredentialPrivateKey = %s\n",
slurm_ctl_conf_ptr->job_credential_private_key);
fprintf(out, "JobCredentialPublicCertificate = %s\n",
slurm_ctl_conf_ptr->job_credential_public_certificate);
fprintf(out, "JobFileAppend = %u\n",
slurm_ctl_conf_ptr->job_file_append);
fprintf(out, "JobRequeue = %u\n",
slurm_ctl_conf_ptr->job_requeue);
fprintf(out, "KillWait = %u\n",
slurm_ctl_conf_ptr->kill_wait);
fprintf(out, "Licenses = %s\n",
slurm_ctl_conf_ptr->licenses);
fprintf(out, "MailProg = %s\n",
slurm_ctl_conf_ptr->mail_prog);
fprintf(out, "MaxJobCount = %u\n",
slurm_ctl_conf_ptr->max_job_cnt);
if (slurm_ctl_conf_ptr->max_mem_per_task & MEM_PER_CPU) {
fprintf(out, "MaxMemPerCPU = %u\n",
slurm_ctl_conf_ptr->max_mem_per_task &
(~MEM_PER_CPU));
} else if (slurm_ctl_conf_ptr->max_mem_per_task) {
fprintf(out, "MaxMemPerNode = %u\n",
slurm_ctl_conf_ptr->max_mem_per_task);
} else
fprintf(out, "MaxMemPerCPU = UNLIMITED\n");
fprintf(out, "MessageTimeout = %u\n",
slurm_ctl_conf_ptr->msg_timeout);
fprintf(out, "MinJobAge = %u\n",
slurm_ctl_conf_ptr->min_job_age);
fprintf(out, "MpiDefault = %s\n",
slurm_ctl_conf_ptr->mpi_default);
#ifdef MULTIPLE_SLURMD
fprintf(out, "MULTIPLE_SLURMD = %d\n", MULTIPLE_SLURMD);
#endif
fprintf(out, "NEXT_JOB_ID = %u\n",
slurm_ctl_conf_ptr->next_job_id);
fprintf(out, "PluginDir = %s\n",
slurm_ctl_conf_ptr->plugindir);
fprintf(out, "PlugStackConfig = %s\n",
slurm_ctl_conf_ptr->plugstack);
private_data_string(slurm_ctl_conf_ptr->private_data,
tmp_str, sizeof(tmp_str));
fprintf(out, "PrivateData = %s\n", tmp_str);
fprintf(out, "ProctrackType = %s\n",
slurm_ctl_conf_ptr->proctrack_type);
fprintf(out, "Prolog = %s\n",
slurm_ctl_conf_ptr->prolog);
fprintf(out, "PropagatePrioProcess = %u\n",
slurm_ctl_conf_ptr->propagate_prio_process);
fprintf(out, "PropagateResourceLimits = %s\n",
slurm_ctl_conf_ptr->propagate_rlimits);
fprintf(out, "PropagateResourceLimitsExcept = %s\n",
slurm_ctl_conf_ptr->propagate_rlimits_except);
fprintf(out, "ResumeProgram = %s\n",
slurm_ctl_conf_ptr->resume_program);
fprintf(out, "ResumeRate = %u\n",
slurm_ctl_conf_ptr->resume_rate);
fprintf(out, "ReturnToService = %u\n",
slurm_ctl_conf_ptr->ret2service);
fprintf(out, "SchedulerParameters = %s\n",
slurm_ctl_conf_ptr->sched_params);
fprintf(out, "SchedulerPort = %u\n",
slurm_ctl_conf_ptr->schedport);
fprintf(out, "SchedulerRootFilter = %u\n",
slurm_ctl_conf_ptr->schedrootfltr);
fprintf(out, "SchedulerTimeSlice = %u\n",
slurm_ctl_conf_ptr->sched_time_slice);
fprintf(out, "SchedulerType = %s\n",
slurm_ctl_conf_ptr->schedtype);
fprintf(out, "SelectType = %s\n",
slurm_ctl_conf_ptr->select_type);
if (slurm_ctl_conf_ptr->select_type_param) {
fprintf(out, "SelectTypeParameters = %s\n",
_select_info(slurm_ctl_conf_ptr->
select_type_param));
}
fprintf(out, "SlurmUser = %s(%u)\n",
slurm_ctl_conf_ptr->slurm_user_name,
slurm_ctl_conf_ptr->slurm_user_id);
fprintf(out, "SlurmctldDebug = %u\n",
slurm_ctl_conf_ptr->slurmctld_debug);
fprintf(out, "SlurmctldLogFile = %s\n",
slurm_ctl_conf_ptr->slurmctld_logfile);
fprintf(out, "SlurmctldPidFile = %s\n",
slurm_ctl_conf_ptr->slurmctld_pidfile);
fprintf(out, "SlurmctldPort = %u\n",
slurm_ctl_conf_ptr->slurmctld_port);
fprintf(out, "SlurmctldTimeout = %u\n",
slurm_ctl_conf_ptr->slurmctld_timeout);
fprintf(out, "SlurmdDebug = %u\n",
slurm_ctl_conf_ptr->slurmd_debug);
fprintf(out, "SlurmdLogFile = %s\n",
slurm_ctl_conf_ptr->slurmd_logfile);
fprintf(out, "SlurmdPidFile = %s\n",
slurm_ctl_conf_ptr->slurmd_pidfile);
#ifndef MULTIPLE_SLURMD
fprintf(out, "SlurmdPort = %u\n",
slurm_ctl_conf_ptr->slurmd_port);
#endif
fprintf(out, "SlurmdSpoolDir = %s\n",
slurm_ctl_conf_ptr->slurmd_spooldir);
fprintf(out, "SlurmdTimeout = %u\n",
slurm_ctl_conf_ptr->slurmd_timeout);
fprintf(out, "SLURM_CONFIG_FILE = %s\n",
slurm_ctl_conf_ptr->slurm_conf);
fprintf(out, "SLURM_VERSION = %s\n", SLURM_VERSION);
fprintf(out, "SrunEpilog = %s\n",
slurm_ctl_conf_ptr->srun_epilog);
fprintf(out, "SrunProlog = %s\n",
slurm_ctl_conf_ptr->srun_prolog);
fprintf(out, "StateSaveLocation = %s\n",
slurm_ctl_conf_ptr->state_save_location);
fprintf(out, "SuspendExcNodes = %s\n",
slurm_ctl_conf_ptr->suspend_exc_nodes);
fprintf(out, "SuspendExcParts = %s\n",
slurm_ctl_conf_ptr->suspend_exc_parts);
fprintf(out, "SuspendProgram = %s\n",
slurm_ctl_conf_ptr->suspend_program);
fprintf(out, "SuspendRate = %u\n",
slurm_ctl_conf_ptr->suspend_rate);
fprintf(out, "SuspendTime = %d\n",
((int)slurm_ctl_conf_ptr->suspend_time - 1));
fprintf(out, "SwitchType = %s\n",
slurm_ctl_conf_ptr->switch_type);
fprintf(out, "TaskEpilog = %s\n",
slurm_ctl_conf_ptr->task_epilog);
fprintf(out, "TaskPlugin = %s\n",
slurm_ctl_conf_ptr->task_plugin);
fprintf(out, "TaskPluginParam = %s\n",
_task_plugin_param(slurm_ctl_conf_ptr->task_plugin_param));
fprintf(out, "TaskProlog = %s\n",
slurm_ctl_conf_ptr->task_prolog);
fprintf(out, "TmpFS = %s\n",
slurm_ctl_conf_ptr->tmp_fs);
fprintf(out, "TreeWidth = %u\n",
slurm_ctl_conf_ptr->tree_width);
fprintf(out, "UsePam = %u\n",
slurm_ctl_conf_ptr->use_pam);
fprintf(out, "UnkillableStepProgram = %s\n",
slurm_ctl_conf_ptr->unkillable_program);
fprintf(out, "UnkillableStepTimeout = %u\n",
slurm_ctl_conf_ptr->unkillable_timeout);
fprintf(out, "WaitTime = %u\n",
slurm_ctl_conf_ptr->wait_time);
}
/*
* slurm_load_ctl_conf - issue RPC to get slurm control configuration
* information if changed since update_time
* IN update_time - time of current configuration data
* IN slurm_ctl_conf_ptr - place to store slurm control configuration
* pointer
* RET 0 on success, otherwise return -1 and set errno to indicate the error
* NOTE: free the response using slurm_free_ctl_conf
*/
int
slurm_load_ctl_conf (time_t update_time, slurm_ctl_conf_t **confp)
{
int rc;
slurm_msg_t req_msg;
slurm_msg_t resp_msg;
last_update_msg_t req;
slurm_msg_t_init(&req_msg);
slurm_msg_t_init(&resp_msg);
req.last_update = update_time;
req_msg.msg_type = REQUEST_BUILD_INFO;
req_msg.data = &req;
if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
return SLURM_ERROR;
switch (resp_msg.msg_type) {
case RESPONSE_BUILD_INFO:
*confp = (slurm_ctl_conf_info_msg_t *) resp_msg.data;
break;
case RESPONSE_SLURM_RC:
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
slurm_free_return_code_msg(resp_msg.data);
if (rc)
slurm_seterrno_ret(rc);
break;
default:
slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
break;
}
return SLURM_PROTOCOL_SUCCESS;
}
/*
* slurm_load_slurmd_status - issue RPC to get the status of slurmd
* daemon on this machine
* IN slurmd_info_ptr - place to store slurmd status information
* RET 0 or -1 on error
* NOTE: free the response using slurm_free_slurmd_status()
*/
extern int
slurm_load_slurmd_status(slurmd_status_t **slurmd_status_ptr)
{
int rc;
slurm_msg_t req_msg;
slurm_msg_t resp_msg;
slurm_msg_t_init(&req_msg);
slurm_msg_t_init(&resp_msg);
/*
* Set request message address to slurmd on localhost
*/
slurm_set_addr(&req_msg.address, (uint16_t)slurm_get_slurmd_port(),
"localhost");
req_msg.msg_type = REQUEST_DAEMON_STATUS;
req_msg.data = NULL;
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
if ((rc != 0) || !resp_msg.auth_cred) {
error("slurm_slurmd_info: %m");
if (resp_msg.auth_cred)
g_slurm_auth_destroy(resp_msg.auth_cred);
return SLURM_ERROR;
}
if (resp_msg.auth_cred)
g_slurm_auth_destroy(resp_msg.auth_cred);
switch (resp_msg.msg_type) {
case RESPONSE_SLURMD_STATUS:
*slurmd_status_ptr = (slurmd_status_t *) resp_msg.data;
break;
case RESPONSE_SLURM_RC:
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
slurm_free_return_code_msg(resp_msg.data);
if (rc)
slurm_seterrno_ret(rc);
break;
default:
slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
break;
}
return SLURM_PROTOCOL_SUCCESS;
}
/*
* slurm_print_slurmd_status - output the contents of slurmd status
* message as loaded using slurm_load_slurmd_status
* IN out - file to write to
* IN slurmd_status_ptr - slurmd status pointer
*/
void slurm_print_slurmd_status (FILE* out,
slurmd_status_t * slurmd_status_ptr)
{
char time_str[32];
if (slurmd_status_ptr == NULL )
return ;
fprintf(out, "Active Steps = %s\n",
slurmd_status_ptr->step_list);
fprintf(out, "Actual CPUs = %u\n",
slurmd_status_ptr->actual_cpus);
fprintf(out, "Actual sockets = %u\n",
slurmd_status_ptr->actual_sockets);
fprintf(out, "Actual cores = %u\n",
slurmd_status_ptr->actual_cores);
fprintf(out, "Actual threads per core = %u\n",
slurmd_status_ptr->actual_threads);
fprintf(out, "Actual real memory = %u MB\n",
slurmd_status_ptr->actual_real_mem);
fprintf(out, "Actual temp disk space = %u MB\n",
slurmd_status_ptr->actual_tmp_disk);
slurm_make_time_str ((time_t *)&slurmd_status_ptr->booted,
time_str, sizeof(time_str));
fprintf(out, "Boot time = %s\n", time_str);
fprintf(out, "Hostname = %s\n",
slurmd_status_ptr->hostname);
if (slurmd_status_ptr->last_slurmctld_msg) {
slurm_make_time_str ((time_t *)
&slurmd_status_ptr->last_slurmctld_msg,
time_str, sizeof(time_str));
fprintf(out, "Last slurmctld msg time = %s\n", time_str);
} else
fprintf(out, "Last slurmctld msg time = NONE\n");
fprintf(out, "Slurmd PID = %u\n",
slurmd_status_ptr->pid);
fprintf(out, "Slurmd Debug = %u\n",
slurmd_status_ptr->slurmd_debug);
fprintf(out, "Slurmd Logfile = %s\n",
slurmd_status_ptr->slurmd_logfile);
fprintf(out, "Version = %s\n",
slurmd_status_ptr->version);
return;
}