Skip to content
Snippets Groups Projects
proc_req.c 146 KiB
Newer Older
/*****************************************************************************\
 *  proc_req.c - process incoming messages to slurmctld
 *****************************************************************************
 *  Copyright (C) 2002-2007 The Regents of the University of California.
 *  Copyright (C) 2008-2011 Lawrence Livermore National Security.
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 *  Written by Morris Jette <jette@llnl.gov>, et. al.
 *  This file is part of SLURM, a resource management program.
 *  For details, see <http://slurm.schedmd.com/>.
 *  Please also read the included file: DISCLAIMER.
 *  SLURM is free software; you can redistribute it and/or modify it under
 *  the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 2 of the License, or (at your option)
 *  any later version.
 *  In addition, as a special exception, the copyright holders give permission
 *  to link the code of portions of this program with the OpenSSL library under
 *  certain conditions as described in each individual source file, and
 *  distribute linked combinations including the two. You must obey the GNU
 *  General Public License in all respects for all of the code used other than
 *  OpenSSL. If you modify file(s) with this exception, you may extend this
 *  exception to your version of the file(s), but you are not obligated to do
 *  so. If you do not wish to do so, delete this exception statement from your
 *  version.  If you delete this exception statement from all source files in
 *  the program, then also delete it here.
 *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *  You should have received a copy of the GNU General Public License along
 *  with SLURM; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
\*****************************************************************************/

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif

#ifdef WITH_PTHREADS
#  include <pthread.h>
#endif				/* WITH_PTHREADS */

#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "slurm/slurm_errno.h"
#include "src/common/checkpoint.h"
#include "src/common/daemonize.h"
#include "src/common/fd.h"
#include "src/common/gres.h"
#include "src/common/hostlist.h"
#include "src/common/log.h"
#include "src/common/macros.h"
#include "src/common/node_select.h"
#include "src/common/pack.h"
#include "src/common/read_config.h"
#include "src/common/slurm_auth.h"
#include "src/common/slurm_cred.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/switch.h"
#include "src/common/xstring.h"
#include "src/common/slurm_ext_sensors.h"
#include "src/slurmctld/agent.h"
#include "src/slurmctld/front_end.h"
#include "src/slurmctld/gang.h"
#include "src/slurmctld/job_scheduler.h"
#include "src/slurmctld/licenses.h"
#include "src/slurmctld/locks.h"
#include "src/slurmctld/proc_req.h"
#include "src/slurmctld/read_config.h"
#include "src/slurmctld/reservation.h"
#include "src/slurmctld/sched_plugin.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/srun_comm.h"
#include "src/slurmctld/state_save.h"
#include "src/slurmctld/trigger_mgr.h"
#include "src/plugins/select/bluegene/bg_enums.h"
static pthread_mutex_t throttle_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t throttle_cond = PTHREAD_COND_INITIALIZER;

static void         _fill_ctld_conf(slurm_ctl_conf_t * build_ptr);
static void         _kill_job_on_msg_fail(uint32_t job_id);
static int 	    _launch_batch_step(job_desc_msg_t *job_desc_msg,
static int          _make_step_cred(struct step_record *step_rec,
				    slurm_cred_t **slurm_cred);
static void         _throttle_fini(int *active_rpc_cnt);
static void         _throttle_start(int *active_rpc_cnt);

inline static void  _slurm_rpc_accounting_first_reg(slurm_msg_t *msg);
inline static void  _slurm_rpc_accounting_register_ctld(slurm_msg_t *msg);
inline static void  _slurm_rpc_accounting_update_msg(slurm_msg_t *msg);
inline static void  _slurm_rpc_allocate_resources(slurm_msg_t * msg);
inline static void  _slurm_rpc_checkpoint(slurm_msg_t * msg);
inline static void  _slurm_rpc_checkpoint_comp(slurm_msg_t * msg);
inline static void  _slurm_rpc_checkpoint_task_comp(slurm_msg_t * msg);
inline static void  _slurm_rpc_delete_partition(slurm_msg_t * msg);
inline static void  _slurm_rpc_complete_job_allocation(slurm_msg_t * msg);
inline static void  _slurm_rpc_complete_batch_script(slurm_msg_t * msg);
inline static void  _slurm_rpc_dump_conf(slurm_msg_t * msg);
inline static void  _slurm_rpc_dump_front_end(slurm_msg_t * msg);
inline static void  _slurm_rpc_dump_jobs(slurm_msg_t * msg);
inline static void  _slurm_rpc_dump_jobs_user(slurm_msg_t * msg);
inline static void  _slurm_rpc_dump_job_single(slurm_msg_t * msg);
inline static void  _slurm_rpc_dump_nodes(slurm_msg_t * msg);
inline static void  _slurm_rpc_dump_node_single(slurm_msg_t * msg);
inline static void  _slurm_rpc_dump_partitions(slurm_msg_t * msg);
inline static void  _slurm_rpc_end_time(slurm_msg_t * msg);
inline static void  _slurm_rpc_epilog_complete(slurm_msg_t * msg);
inline static void  _slurm_rpc_get_shares(slurm_msg_t *msg);
inline static void  _slurm_rpc_get_topo(slurm_msg_t * msg);
inline static void  _slurm_rpc_get_priority_factors(slurm_msg_t *msg);
inline static void  _slurm_rpc_job_notify(slurm_msg_t * msg);
inline static void  _slurm_rpc_job_ready(slurm_msg_t * msg);
inline static void  _slurm_rpc_job_sbcast_cred(slurm_msg_t * msg);
inline static void  _slurm_rpc_job_step_kill(slurm_msg_t * msg);
inline static void  _slurm_rpc_job_step_create(slurm_msg_t * msg);
inline static void  _slurm_rpc_job_step_get_info(slurm_msg_t * msg);
inline static void  _slurm_rpc_job_will_run(slurm_msg_t * msg);
inline static void  _slurm_rpc_node_registration(slurm_msg_t * msg);
inline static void  _slurm_rpc_block_info(slurm_msg_t * msg);
inline static void  _slurm_rpc_job_alloc_info(slurm_msg_t * msg);
inline static void  _slurm_rpc_job_alloc_info_lite(slurm_msg_t * msg);
inline static void  _slurm_rpc_ping(slurm_msg_t * msg);
inline static void  _slurm_rpc_reboot_nodes(slurm_msg_t * msg);
inline static void  _slurm_rpc_reconfigure_controller(slurm_msg_t * msg);
inline static void  _slurm_rpc_resv_create(slurm_msg_t * msg);
inline static void  _slurm_rpc_resv_update(slurm_msg_t * msg);
inline static void  _slurm_rpc_resv_delete(slurm_msg_t * msg);
inline static void  _slurm_rpc_resv_show(slurm_msg_t * msg);
inline static void  _slurm_rpc_requeue(slurm_msg_t * msg);
inline static void  _slurm_rpc_takeover(slurm_msg_t * msg);
inline static void  _slurm_rpc_set_debug_flags(slurm_msg_t *msg);
inline static void  _slurm_rpc_set_debug_level(slurm_msg_t *msg);
inline static void  _slurm_rpc_set_schedlog_level(slurm_msg_t *msg);
inline static void  _slurm_rpc_shutdown_controller(slurm_msg_t * msg);
inline static void  _slurm_rpc_shutdown_controller_immediate(slurm_msg_t *
							     msg);
inline static void  _slurm_rpc_step_complete(slurm_msg_t * msg);
inline static void  _slurm_rpc_step_layout(slurm_msg_t * msg);
inline static void  _slurm_rpc_step_update(slurm_msg_t * msg);
inline static void  _slurm_rpc_submit_batch_job(slurm_msg_t * msg);
inline static void  _slurm_rpc_suspend(slurm_msg_t * msg);
inline static void  _slurm_rpc_trigger_clear(slurm_msg_t * msg);
inline static void  _slurm_rpc_trigger_get(slurm_msg_t * msg);
inline static void  _slurm_rpc_trigger_set(slurm_msg_t * msg);
inline static void  _slurm_rpc_trigger_pull(slurm_msg_t * msg);
inline static void  _slurm_rpc_update_front_end(slurm_msg_t * msg);
inline static void  _slurm_rpc_update_job(slurm_msg_t * msg);
inline static void  _slurm_rpc_update_node(slurm_msg_t * msg);
inline static void  _slurm_rpc_update_partition(slurm_msg_t * msg);
inline static void  _slurm_rpc_update_block(slurm_msg_t * msg);
inline static void  _slurm_rpc_dump_spank(slurm_msg_t * msg);
jette's avatar
jette committed
inline static void  _slurm_rpc_dump_stats(slurm_msg_t * msg);
inline static void  _update_cred_key(void);

jette's avatar
jette committed
extern diag_stats_t slurmctld_diag_stats;
/*
 * slurmctld_req  - Process an individual RPC request
 * IN/OUT msg - the request message, data associated with the message is freed
 */
void slurmctld_req (slurm_msg_t * msg)
{
	/* Just to validate the cred */
	(void) g_slurm_auth_get_uid(msg->auth_cred, NULL);
	if (g_slurm_auth_errno(msg->auth_cred) != SLURM_SUCCESS) {
		error("Bad authentication: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(msg->auth_cred)));
		return;
	}

	switch (msg->msg_type) {
	case REQUEST_RESOURCE_ALLOCATION:
		_slurm_rpc_allocate_resources(msg);
		slurm_free_job_desc_msg(msg->data);
		break;
	case REQUEST_BUILD_INFO:
		_slurm_rpc_dump_conf(msg);
		slurm_free_last_update_msg(msg->data);
		break;
	case REQUEST_JOB_INFO:
		_slurm_rpc_dump_jobs(msg);
		slurm_free_job_info_request_msg(msg->data);
		break;
	case REQUEST_JOB_USER_INFO:
		_slurm_rpc_dump_jobs_user(msg);
		slurm_free_job_user_id_msg(msg->data);
		break;
	case REQUEST_JOB_INFO_SINGLE:
		_slurm_rpc_dump_job_single(msg);
		slurm_free_job_id_msg(msg->data);
		break;
	case REQUEST_SHARE_INFO:
		_slurm_rpc_get_shares(msg);
		slurm_free_shares_request_msg(msg->data);
		break;
	case REQUEST_PRIORITY_FACTORS:
		_slurm_rpc_get_priority_factors(msg);
		slurm_free_priority_factors_request_msg(msg->data);
		break;
	case REQUEST_JOB_END_TIME:
		_slurm_rpc_end_time(msg);
		slurm_free_job_alloc_info_msg(msg->data);
	case REQUEST_FRONT_END_INFO:
		_slurm_rpc_dump_front_end(msg);
		slurm_free_front_end_info_request_msg(msg->data);
		break;
	case REQUEST_NODE_INFO:
		_slurm_rpc_dump_nodes(msg);
		slurm_free_node_info_request_msg(msg->data);
	case REQUEST_NODE_INFO_SINGLE:
		_slurm_rpc_dump_node_single(msg);
		slurm_free_node_info_single_msg(msg->data);
		break;
	case REQUEST_PARTITION_INFO:
		_slurm_rpc_dump_partitions(msg);
		slurm_free_part_info_request_msg(msg->data);
	case MESSAGE_EPILOG_COMPLETE:
		_slurm_rpc_epilog_complete(msg);
		slurm_free_epilog_complete_msg(msg->data);
		break;
	case REQUEST_CANCEL_JOB_STEP:
		_slurm_rpc_job_step_kill(msg);
		slurm_free_job_step_kill_msg(msg->data);
		break;
	case REQUEST_COMPLETE_JOB_ALLOCATION:
		_slurm_rpc_complete_job_allocation(msg);
		slurm_free_complete_job_allocation_msg(msg->data);
		break;
	case REQUEST_COMPLETE_BATCH_JOB:
	case REQUEST_COMPLETE_BATCH_SCRIPT:
		_slurm_rpc_complete_batch_script(msg);
		slurm_free_complete_batch_script_msg(msg->data);
		break;
	case REQUEST_JOB_STEP_CREATE:
		_slurm_rpc_job_step_create(msg);
		slurm_free_job_step_create_request_msg(msg->data);
		break;
	case REQUEST_JOB_STEP_INFO:
		_slurm_rpc_job_step_get_info(msg);
		slurm_free_job_step_info_request_msg(msg->data);
		break;
	case REQUEST_JOB_WILL_RUN:
		_slurm_rpc_job_will_run(msg);
		slurm_free_job_desc_msg(msg->data);
		break;
	case MESSAGE_NODE_REGISTRATION_STATUS:
		_slurm_rpc_node_registration(msg);
		slurm_free_node_registration_status_msg(msg->data);
		break;
	case REQUEST_JOB_ALLOCATION_INFO:
		_slurm_rpc_job_alloc_info(msg);
		slurm_free_job_alloc_info_msg(msg->data);
		break;
	case REQUEST_JOB_ALLOCATION_INFO_LITE:
		_slurm_rpc_job_alloc_info_lite(msg);
Danny Auble's avatar
Danny Auble committed
		slurm_free_job_alloc_info_msg(msg->data);
	case REQUEST_JOB_SBCAST_CRED:
		_slurm_rpc_job_sbcast_cred(msg);
		slurm_free_job_alloc_info_msg(msg->data);
		break;
	case REQUEST_PING:
		_slurm_rpc_ping(msg);
		/* No body to free */
		break;
	case REQUEST_RECONFIGURE:
		_slurm_rpc_reconfigure_controller(msg);
		/* No body to free */
		break;
	case REQUEST_CONTROL:
		_slurm_rpc_shutdown_controller(msg);
		/* No body to free */
		break;
	case REQUEST_TAKEOVER:
		_slurm_rpc_takeover(msg);
		/* No body to free */
		break;
	case REQUEST_SHUTDOWN:
		_slurm_rpc_shutdown_controller(msg);
		slurm_free_shutdown_msg(msg->data);
		break;
	case REQUEST_SHUTDOWN_IMMEDIATE:
		_slurm_rpc_shutdown_controller_immediate(msg);
		/* No body to free */
		break;
	case REQUEST_SUBMIT_BATCH_JOB:
		_slurm_rpc_submit_batch_job(msg);
		slurm_free_job_desc_msg(msg->data);
		break;
	case REQUEST_UPDATE_FRONT_END:
		_slurm_rpc_update_front_end(msg);
		slurm_free_update_front_end_msg(msg->data);
		break;
	case REQUEST_UPDATE_JOB:
		_slurm_rpc_update_job(msg);
		slurm_free_job_desc_msg(msg->data);
		break;
	case REQUEST_UPDATE_NODE:
		_slurm_rpc_update_node(msg);
		slurm_free_update_node_msg(msg->data);
		break;
	case REQUEST_CREATE_PARTITION:
	case REQUEST_UPDATE_PARTITION:
		_slurm_rpc_update_partition(msg);
		slurm_free_update_part_msg(msg->data);
		break;
	case REQUEST_DELETE_PARTITION:
		_slurm_rpc_delete_partition(msg);
		slurm_free_delete_part_msg(msg->data);
		break;
	case REQUEST_CREATE_RESERVATION:
		_slurm_rpc_resv_create(msg);
		slurm_free_resv_desc_msg(msg->data);
		break;
	case REQUEST_UPDATE_RESERVATION:
		_slurm_rpc_resv_update(msg);
		slurm_free_resv_desc_msg(msg->data);
		break;
	case REQUEST_DELETE_RESERVATION:
		_slurm_rpc_resv_delete(msg);
		slurm_free_resv_name_msg(msg->data);
		break;
	case REQUEST_UPDATE_BLOCK:
		_slurm_rpc_update_block(msg);
	case REQUEST_RESERVATION_INFO:
		_slurm_rpc_resv_show(msg);
		slurm_free_resv_info_request_msg(msg->data);
	case REQUEST_NODE_REGISTRATION_STATUS:
		error("slurmctld is talking with itself. "
		slurm_send_rc_msg(msg, EINVAL);
		break;
	case REQUEST_CHECKPOINT:
		_slurm_rpc_checkpoint(msg);
		slurm_free_checkpoint_msg(msg->data);
		break;
	case REQUEST_CHECKPOINT_COMP:
		_slurm_rpc_checkpoint_comp(msg);
		slurm_free_checkpoint_comp_msg(msg->data);
		break;
	case REQUEST_CHECKPOINT_TASK_COMP:
		_slurm_rpc_checkpoint_task_comp(msg);
		slurm_free_checkpoint_task_comp_msg(msg->data);
		break;
	case REQUEST_SUSPEND:
		_slurm_rpc_suspend(msg);
		slurm_free_suspend_msg(msg->data);
		break;
	case REQUEST_JOB_REQUEUE:
		_slurm_rpc_requeue(msg);
		slurm_free_job_id_msg(msg->data);
		break;
	case REQUEST_JOB_READY:
		_slurm_rpc_job_ready(msg);
		slurm_free_job_id_msg(msg->data);
		break;
	case REQUEST_BLOCK_INFO:
		_slurm_rpc_block_info(msg);
		slurm_free_block_info_request_msg(msg->data);
	case REQUEST_STEP_COMPLETE:
		_slurm_rpc_step_complete(msg);
		slurm_free_step_complete_msg(msg->data);
	case REQUEST_STEP_LAYOUT:
		_slurm_rpc_step_layout(msg);
		slurm_free_job_step_id_msg(msg->data);
	case REQUEST_UPDATE_JOB_STEP:
		_slurm_rpc_step_update(msg);
		slurm_free_update_step_msg(msg->data);
		break;
	case REQUEST_TRIGGER_SET:
		_slurm_rpc_trigger_set(msg);
		slurm_free_trigger_msg(msg->data);
		break;
	case REQUEST_TRIGGER_GET:
		_slurm_rpc_trigger_get(msg);
		slurm_free_trigger_msg(msg->data);
		break;
	case REQUEST_TRIGGER_CLEAR:
		_slurm_rpc_trigger_clear(msg);
		slurm_free_trigger_msg(msg->data);
		break;
	case REQUEST_TRIGGER_PULL:
		_slurm_rpc_trigger_pull(msg);
		slurm_free_trigger_msg(msg->data);
		break;
	case REQUEST_JOB_NOTIFY:
		_slurm_rpc_job_notify(msg);
		slurm_free_job_notify_msg(msg->data);
		break;
	case REQUEST_SET_DEBUG_FLAGS:
		_slurm_rpc_set_debug_flags(msg);
		slurm_free_set_debug_flags_msg(msg->data);
		break;
	case REQUEST_SET_DEBUG_LEVEL:
		_slurm_rpc_set_debug_level(msg);
		slurm_free_set_debug_level_msg(msg->data);
		break;
	case REQUEST_SET_SCHEDLOG_LEVEL:
		_slurm_rpc_set_schedlog_level(msg);
		slurm_free_set_debug_level_msg(msg->data);
		break;
	case ACCOUNTING_UPDATE_MSG:
		_slurm_rpc_accounting_update_msg(msg);
		slurm_free_accounting_update_msg(msg->data);
		break;
	case ACCOUNTING_FIRST_REG:
		_slurm_rpc_accounting_first_reg(msg);
		/* No body to free */
		break;
	case ACCOUNTING_REGISTER_CTLD:
		_slurm_rpc_accounting_register_ctld(msg);
		slurm_free_reboot_msg(msg->data);
	case REQUEST_TOPO_INFO:
		_slurm_rpc_get_topo(msg);
		/* No body to free */
		break;
	case REQUEST_SPANK_ENVIRONMENT:
		_slurm_rpc_dump_spank(msg);
		slurm_free_spank_env_request_msg(msg->data);
		break;
	case REQUEST_REBOOT_NODES:
		_slurm_rpc_reboot_nodes(msg);
		/* No body to free */
		break;
jette's avatar
jette committed
	case REQUEST_STATS_INFO:
		_slurm_rpc_dump_stats(msg);
		slurm_free_stats_info_request_msg(msg->data);
		break;
		error("invalid RPC msg_type=%d", msg->msg_type);
/* These functions prevent certain RPCs from keeping the slurmctld write locks
 * constantly set, which can prevent other RPCs and system functions from being
 * processed. For example, a steady stream of batch submissions can prevent
 * squeue from responding or jobs from being scheduled. */
static void _throttle_start(int *active_rpc_cnt)
{
	slurm_mutex_lock(&throttle_mutex);
	while (1) {
		if (*active_rpc_cnt == 0) {
			(*active_rpc_cnt)++;
			break;
		}
		pthread_cond_wait(&throttle_cond, &throttle_mutex);
	}
	slurm_mutex_unlock(&throttle_mutex);
	usleep(1);
}
static void _throttle_fini(int *active_rpc_cnt)
{
	slurm_mutex_lock(&throttle_mutex);
	(*active_rpc_cnt)--;
	pthread_cond_broadcast(&throttle_cond);
	slurm_mutex_unlock(&throttle_mutex);
}

/*
 * _fill_ctld_conf - make a copy of current slurm configuration
 *	this is done with locks set so the data can change at other times
 * OUT conf_ptr - place to copy configuration to
 */
void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr)
{
	slurm_ctl_conf_t *conf = slurm_conf_lock();

	memset(conf_ptr, 0, sizeof(slurm_ctl_conf_t));
	conf_ptr->last_update         = time(NULL);
	conf_ptr->accounting_storage_enforce =
	conf_ptr->accounting_storage_host =
		xstrdup(conf->accounting_storage_host);
	conf_ptr->accounting_storage_loc =
		xstrdup(conf->accounting_storage_loc);
	conf_ptr->accounting_storage_port = conf->accounting_storage_port;
	conf_ptr->accounting_storage_type =
		xstrdup(conf->accounting_storage_type);
	conf_ptr->accounting_storage_user =
		xstrdup(conf->accounting_storage_user);
	conf_ptr->accounting_storage_port = conf->accounting_storage_port;
	conf_ptr->acctng_store_job_comment = conf->acctng_store_job_comment;

	conf_ptr->acct_gather_energy_type =
		xstrdup(conf->acct_gather_energy_type);
	conf_ptr->acct_gather_filesystem_type =
		xstrdup(conf->acct_gather_filesystem_type);
	conf_ptr->acct_gather_infiniband_type =
		xstrdup(conf->acct_gather_infiniband_type);
	conf_ptr->acct_gather_profile_type =
		xstrdup(conf->acct_gather_profile_type);
	conf_ptr->acct_gather_node_freq = conf->acct_gather_node_freq;

	conf_ptr->authtype            = xstrdup(conf->authtype);
	conf_ptr->backup_addr         = xstrdup(conf->backup_addr);
	conf_ptr->backup_controller   = xstrdup(conf->backup_controller);
	conf_ptr->batch_start_timeout = conf->batch_start_timeout;
	conf_ptr->boot_time           = slurmctld_config.boot_time;
	conf_ptr->checkpoint_type     = xstrdup(conf->checkpoint_type);
Moe Jette's avatar
Moe Jette committed
	conf_ptr->cluster_name        = xstrdup(conf->cluster_name);
	conf_ptr->complete_wait       = conf->complete_wait;
	conf_ptr->control_addr        = xstrdup(conf->control_addr);
	conf_ptr->control_machine     = xstrdup(conf->control_machine);
	conf_ptr->crypto_type         = xstrdup(conf->crypto_type);
	conf_ptr->def_mem_per_cpu     = conf->def_mem_per_cpu;
	conf_ptr->debug_flags         = conf->debug_flags;
	conf_ptr->disable_root_jobs   = conf->disable_root_jobs;
	conf_ptr->dynalloc_port       = conf->dynalloc_port;
	conf_ptr->enforce_part_limits = conf->enforce_part_limits;
	conf_ptr->epilog              = xstrdup(conf->epilog);
	conf_ptr->epilog_msg_time     = conf->epilog_msg_time;
	conf_ptr->epilog_slurmctld    = xstrdup(conf->epilog_slurmctld);
	conf_ptr->ext_sensors_type    = xstrdup(conf->ext_sensors_type);
	conf_ptr->ext_sensors_freq    = conf->ext_sensors_freq;
	conf_ptr->fast_schedule       = conf->fast_schedule;
	conf_ptr->first_job_id        = conf->first_job_id;
	conf_ptr->gres_plugins        = xstrdup(conf->gres_plugins);
	conf_ptr->group_info          = conf->group_info;

	conf_ptr->inactive_limit      = conf->inactive_limit;
	conf_ptr->hash_val            = conf->hash_val;
	conf_ptr->health_check_interval = conf->health_check_interval;
	conf_ptr->health_check_node_state = conf->health_check_node_state;
	conf_ptr->health_check_program = xstrdup(conf->health_check_program);
	conf_ptr->job_acct_gather_freq  = xstrdup(conf->job_acct_gather_freq);
	conf_ptr->job_acct_gather_type  = xstrdup(conf->job_acct_gather_type);
	conf_ptr->job_ckpt_dir        = xstrdup(conf->job_ckpt_dir);
	conf_ptr->job_comp_host       = xstrdup(conf->job_comp_host);
	conf_ptr->job_comp_loc        = xstrdup(conf->job_comp_loc);
	conf_ptr->job_comp_port       = conf->job_comp_port;
	conf_ptr->job_comp_type       = xstrdup(conf->job_comp_type);
	conf_ptr->job_comp_user       = xstrdup(conf->job_comp_user);

	conf_ptr->job_credential_private_key =
		xstrdup(conf->job_credential_private_key);
	conf_ptr->job_credential_public_certificate =
		xstrdup(conf->job_credential_public_certificate);
	conf_ptr->job_file_append     = conf->job_file_append;
	conf_ptr->job_requeue         = conf->job_requeue;
	conf_ptr->job_submit_plugins  = xstrdup(conf->job_submit_plugins);
	conf_ptr->get_env_timeout     = conf->get_env_timeout;
	conf_ptr->keep_alive_time     = conf->keep_alive_time;
	conf_ptr->kill_on_bad_exit    = conf->kill_on_bad_exit;
	conf_ptr->launch_type         = xstrdup(conf->launch_type);
	conf_ptr->licenses            = xstrdup(conf->licenses);
	conf_ptr->licenses_used       = get_licenses_used();
	conf_ptr->mail_prog           = xstrdup(conf->mail_prog);
	conf_ptr->max_array_sz        = conf->max_array_sz;
	conf_ptr->max_job_cnt         = conf->max_job_cnt;
	conf_ptr->max_job_id          = conf->max_job_id;
	conf_ptr->max_mem_per_cpu     = conf->max_mem_per_cpu;
	conf_ptr->max_step_cnt        = conf->max_step_cnt;
	conf_ptr->max_tasks_per_node  = conf->max_tasks_per_node;
	conf_ptr->min_job_age         = conf->min_job_age;
	conf_ptr->mpi_default         = xstrdup(conf->mpi_default);
	conf_ptr->mpi_params          = xstrdup(conf->mpi_params);
	conf_ptr->msg_timeout         = conf->msg_timeout;
	conf_ptr->next_job_id         = get_next_job_id();
	conf_ptr->node_prefix         = xstrdup(conf->node_prefix);

	conf_ptr->over_time_limit     = conf->over_time_limit;

	conf_ptr->plugindir           = xstrdup(conf->plugindir);
Moe Jette's avatar
Moe Jette committed
	conf_ptr->plugstack           = xstrdup(conf->plugstack);
	conf_ptr->preempt_mode        = conf->preempt_mode;
	conf_ptr->preempt_type        = xstrdup(conf->preempt_type);
	conf_ptr->priority_decay_hl   = conf->priority_decay_hl;
	conf_ptr->priority_calc_period = conf->priority_calc_period;
	conf_ptr->priority_favor_small= conf->priority_favor_small;
	conf_ptr->priority_flags      = conf->priority_flags;
	conf_ptr->priority_max_age    = conf->priority_max_age;
	conf_ptr->priority_reset_period = conf->priority_reset_period;
	conf_ptr->priority_type       = xstrdup(conf->priority_type);
	conf_ptr->priority_weight_age = conf->priority_weight_age;
	conf_ptr->priority_weight_fs  = conf->priority_weight_fs;
	conf_ptr->priority_weight_js  = conf->priority_weight_js;
	conf_ptr->priority_weight_part= conf->priority_weight_part;
	conf_ptr->priority_weight_qos = conf->priority_weight_qos;

	conf_ptr->private_data        = conf->private_data;
	conf_ptr->proctrack_type      = xstrdup(conf->proctrack_type);
	conf_ptr->prolog              = xstrdup(conf->prolog);
	conf_ptr->prolog_slurmctld    = xstrdup(conf->prolog_slurmctld);
		slurmctld_conf.propagate_prio_process;
	conf_ptr->propagate_rlimits   = xstrdup(conf->propagate_rlimits);
	conf_ptr->propagate_rlimits_except = xstrdup(conf->
	conf_ptr->reboot_program      = xstrdup(conf->reboot_program);
	conf_ptr->reconfig_flags      = conf->reconfig_flags;
	conf_ptr->resume_program      = xstrdup(conf->resume_program);
	conf_ptr->resume_rate         = conf->resume_rate;
	conf_ptr->resume_timeout      = conf->resume_timeout;
	conf_ptr->resv_epilog         = xstrdup(conf->resv_epilog);
	conf_ptr->resv_over_run       = conf->resv_over_run;
	conf_ptr->resv_prolog         = xstrdup(conf->resv_prolog);
	conf_ptr->ret2service         = conf->ret2service;
	conf_ptr->salloc_default_command = xstrdup(conf->
						   salloc_default_command);
	if (conf->sched_params)
		conf_ptr->sched_params = xstrdup(conf->sched_params);
	else
		conf_ptr->sched_params = slurm_sched_p_get_conf();
	conf_ptr->schedport           = conf->schedport;
	conf_ptr->schedrootfltr       = conf->schedrootfltr;
	conf_ptr->sched_logfile       = xstrdup(conf->sched_logfile);
	conf_ptr->sched_log_level     = conf->sched_log_level;
	conf_ptr->sched_time_slice    = conf->sched_time_slice;
	conf_ptr->schedtype           = xstrdup(conf->schedtype);
	conf_ptr->select_type         = xstrdup(conf->select_type);
	select_g_get_info_from_plugin(SELECT_CONFIG_INFO, NULL,
				      &conf_ptr->select_conf_key_pairs);

	conf_ptr->select_type_param   = conf->select_type_param;
	conf_ptr->slurm_user_id       = conf->slurm_user_id;
	conf_ptr->slurm_user_name     = xstrdup(conf->slurm_user_name);
	conf_ptr->slurmctld_debug     = conf->slurmctld_debug;
	conf_ptr->slurmctld_logfile   = xstrdup(conf->slurmctld_logfile);
	conf_ptr->slurmctld_pidfile   = xstrdup(conf->slurmctld_pidfile);
	conf_ptr->slurmctld_plugstack = xstrdup(conf->slurmctld_plugstack);
	conf_ptr->slurmctld_port      = conf->slurmctld_port;
	conf_ptr->slurmctld_port_count = conf->slurmctld_port_count;
	conf_ptr->slurmctld_timeout   = conf->slurmctld_timeout;
	conf_ptr->slurmd_debug        = conf->slurmd_debug;
	conf_ptr->slurmd_logfile      = xstrdup(conf->slurmd_logfile);
	conf_ptr->slurmd_pidfile      = xstrdup(conf->slurmd_pidfile);
	conf_ptr->slurmd_port         = conf->slurmd_port;
	conf_ptr->slurmd_spooldir     = xstrdup(conf->slurmd_spooldir);
	conf_ptr->slurmd_timeout      = conf->slurmd_timeout;
	conf_ptr->slurmd_user_id      = conf->slurmd_user_id;
	conf_ptr->slurmd_user_name    = xstrdup(conf->slurmd_user_name);
	conf_ptr->slurm_conf          = xstrdup(conf->slurm_conf);
	conf_ptr->srun_prolog         = xstrdup(conf->srun_prolog);
	conf_ptr->srun_epilog         = xstrdup(conf->srun_epilog);
	conf_ptr->state_save_location = xstrdup(conf->state_save_location);
	conf_ptr->suspend_exc_nodes   = xstrdup(conf->suspend_exc_nodes);
	conf_ptr->suspend_exc_parts   = xstrdup(conf->suspend_exc_parts);
	conf_ptr->suspend_program     = xstrdup(conf->suspend_program);
	conf_ptr->suspend_rate        = conf->suspend_rate;
	conf_ptr->suspend_time        = conf->suspend_time;
	conf_ptr->suspend_timeout     = conf->suspend_timeout;
	conf_ptr->switch_type         = xstrdup(conf->switch_type);
	conf_ptr->task_epilog         = xstrdup(conf->task_epilog);
	conf_ptr->task_prolog         = xstrdup(conf->task_prolog);
	conf_ptr->task_plugin         = xstrdup(conf->task_plugin);
	conf_ptr->task_plugin_param   = conf->task_plugin_param;
	conf_ptr->tmp_fs              = xstrdup(conf->tmp_fs);
	conf_ptr->topology_plugin     = xstrdup(conf->topology_plugin);
	conf_ptr->track_wckey         = conf->track_wckey;
	conf_ptr->tree_width          = conf->tree_width;

	conf_ptr->wait_time           = conf->wait_time;

	conf_ptr->use_pam             = conf->use_pam;
	conf_ptr->unkillable_program  = xstrdup(conf->unkillable_program);
	conf_ptr->unkillable_timeout  = conf->unkillable_timeout;
	conf_ptr->version             = xstrdup(SLURM_VERSION_STRING);
	conf_ptr->vsize_factor        = conf->vsize_factor;
 * validate_slurm_user - validate that the uid is authorized to see
 *      privileged data (either user root or SlurmUser)
 * IN uid - user to validate
 * RET true if permitted to run, false otherwise
 */
extern bool validate_slurm_user(uid_t uid)
	if ((uid == 0) || (uid == getuid()))
/*
 * validate_super_user - validate that the uid is authorized at the
 *      root, SlurmUser, or SLURMDB_ADMIN_SUPER_USER level
 * IN uid - user to validate
 * RET true if permitted to run, false otherwise
 */
extern bool validate_super_user(uid_t uid)
{
	if ((uid == 0) || (uid == getuid()) ||
	    assoc_mgr_get_admin_level(acct_db_conn, uid) >=
	    SLURMDB_ADMIN_SUPER_USER)
		return true;
	else
		return false;
}

/*
 * validate_operator - validate that the uid is authorized at the
 *      root, SlurmUser, or SLURMDB_ADMIN_OPERATOR level
 * IN uid - user to validate
 * RET true if permitted to run, false otherwise
 */
extern bool validate_operator(uid_t uid)
{
	if ((uid == 0) || (uid == getuid()) ||
	    assoc_mgr_get_admin_level(acct_db_conn, uid) >=
	    SLURMDB_ADMIN_OPERATOR)
		return true;
	else
		return false;
}

/* _kill_job_on_msg_fail - The request to create a job record successed,
 *	but the reply message to srun failed. We kill the job to avoid
 *	leaving it orphaned */
static void _kill_job_on_msg_fail(uint32_t job_id)
{
	/* Locks: Write job, write node */
		NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };

	error("Job allocate response msg send failure, killing JobId=%u",
	lock_slurmctld(job_write_lock);
	job_complete(job_id, 0, false, false, 0);
/* create a credential for a given job step, return error code */
static int _make_step_cred(struct step_record *step_ptr,
			   slurm_cred_t **slurm_cred)
	struct job_record* job_ptr = step_ptr->job_ptr;
	job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs;
	xassert(job_resrcs_ptr && job_resrcs_ptr->cpus);

	memset(&cred_arg, 0, sizeof(slurm_cred_arg_t));

	cred_arg.jobid    = job_ptr->job_id;
	cred_arg.stepid   = step_ptr->step_id;
	cred_arg.uid      = job_ptr->user_id;

	cred_arg.job_core_bitmap = job_resrcs_ptr->core_bitmap;
	cred_arg.job_hostlist    = job_resrcs_ptr->nodes;
	cred_arg.job_mem_limit   = job_ptr->details->pn_min_memory;
	cred_arg.job_nhosts      = job_resrcs_ptr->nhosts;
	cred_arg.job_gres_list   = job_ptr->gres_list;
	cred_arg.step_gres_list  = step_ptr->gres_list;

	cred_arg.step_core_bitmap = step_ptr->core_bitmap_job;
	xassert(job_ptr->batch_host);
	cred_arg.step_hostlist   = job_ptr->batch_host;
	cred_arg.step_hostlist   = step_ptr->step_layout->node_list;
	if (step_ptr->pn_min_memory)
		cred_arg.step_mem_limit  = step_ptr->pn_min_memory;
	cred_arg.cores_per_socket    = job_resrcs_ptr->cores_per_socket;
	cred_arg.sockets_per_node    = job_resrcs_ptr->sockets_per_node;
	cred_arg.sock_core_rep_count = job_resrcs_ptr->sock_core_rep_count;
	*slurm_cred = slurm_cred_create(slurmctld_config.cred_ctx, &cred_arg);
Moe Jette's avatar
Moe Jette committed
	if (*slurm_cred == NULL) {
		error("slurm_cred_create error");
		return ESLURM_INVALID_JOB_CREDENTIAL;
	}

	return SLURM_SUCCESS;
}

/* _slurm_rpc_allocate_resources:  process RPC to allocate resources for
 *	a job */
static void _slurm_rpc_allocate_resources(slurm_msg_t * msg)
{
	static int active_rpc_cnt = 0;
	int error_code = SLURM_SUCCESS;
	slurm_msg_t response_msg;
	job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data;
	resource_allocation_response_msg_t alloc_msg;
	/* Locks: Read config, write job, write node, read partition */
		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
	uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
	int immediate = job_desc_msg->immediate;
	uint16_t port;	/* dummy value */
	slurm_addr_t resp_addr;
	if ((uid != job_desc_msg->user_id) && (!validate_slurm_user(uid))) {
		error_code = ESLURM_USER_ID_MISSING;
		error("Security violation, RESOURCE_ALLOCATE from uid=%d",
		      uid);
	debug2("sched: Processing RPC: REQUEST_RESOURCE_ALLOCATION from uid=%d",
	       uid);

	/* do RPC call */
	if ((job_desc_msg->alloc_node == NULL) ||
	    (job_desc_msg->alloc_node[0] == '\0')) {
		error_code = ESLURM_INVALID_NODE_NAME;
		error("REQUEST_RESOURCE_ALLOCATE lacks alloc_node from uid=%d",
		      uid);
	if (error_code == SLURM_SUCCESS)
		error_code = validate_job_create_req(job_desc_msg);
	 * Catch attempts to nest salloc sessions. It is not possible to use an
	 * ALPS session which has the same alloc_sid, it fails even if PAGG
	 * container IDs are used.
	 */
	if (allocated_session_in_use(job_desc_msg)) {
		error_code = ESLURM_RESERVATION_BUSY;
		error("attempt to nest ALPS allocation on %s:%d by uid=%d",
			job_desc_msg->alloc_node, job_desc_msg->alloc_sid, uid);
	}
#endif
	slurm_get_peer_addr(msg->conn_fd, &resp_addr);
	job_desc_msg->resp_host = xmalloc(16);
	slurm_get_ip_str(&resp_addr, &port, job_desc_msg->resp_host, 16);
	dump_job_desc(job_desc_msg);
	if (error_code == SLURM_SUCCESS) {
		_throttle_start(&active_rpc_cnt);
		lock_slurmctld(job_write_lock);
		error_code = job_allocate(job_desc_msg, immediate,
					  true, uid, &job_ptr);
		/* unlock after finished using the job structure data */
		END_TIMER2("_slurm_rpc_allocate_resources");
	if ((error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) ||
	    (error_code == ESLURM_RESERVATION_NOT_USABLE) ||
	    (error_code == ESLURM_QOS_THRES) ||
	    (error_code == ESLURM_NODE_NOT_AVAIL) ||
	    (error_code == ESLURM_JOB_HELD))
	if ((error_code == SLURM_SUCCESS) ||
	    ((immediate == 0) && job_waiting)) {
		info("sched: _slurm_rpc_allocate_resources JobId=%u "
		     "NodeList=%s %s",job_ptr->job_id,
		     job_ptr->nodes, TIME_STR);
		if (job_ptr->job_resrcs && job_ptr->job_resrcs->cpu_array_cnt) {
			alloc_msg.num_cpu_groups = job_ptr->job_resrcs->
			alloc_msg.cpu_count_reps = xmalloc(sizeof(uint32_t) *
			       job_ptr->job_resrcs->cpu_array_reps,
			       (sizeof(uint32_t) * job_ptr->job_resrcs->
			alloc_msg.cpus_per_node  = xmalloc(sizeof(uint16_t) *
			       job_ptr->job_resrcs->cpu_array_value,
			       (sizeof(uint16_t) * job_ptr->job_resrcs->
		} else {
			alloc_msg.num_cpu_groups = 0;
			alloc_msg.cpu_count_reps = NULL;
			alloc_msg.cpus_per_node  = NULL;
		}
		alloc_msg.error_code     = error_code;
		alloc_msg.job_id         = job_ptr->job_id;
		alloc_msg.node_cnt       = job_ptr->node_cnt;
		alloc_msg.node_list      = xstrdup(job_ptr->nodes);
		alloc_msg.alias_list     = xstrdup(job_ptr->alias_list);
			select_g_select_jobinfo_copy(job_ptr->select_jobinfo);
		if (job_ptr->details) {
			alloc_msg.pn_min_memory = job_ptr->details->
						  pn_min_memory;
		} else {
			alloc_msg.pn_min_memory = 0;
		}
		unlock_slurmctld(job_write_lock);
		_throttle_fini(&active_rpc_cnt);
		slurm_msg_t_init(&response_msg);
		response_msg.msg_type = RESPONSE_RESOURCE_ALLOCATION;
		response_msg.data = &alloc_msg;
		if (slurm_send_node_msg(msg->conn_fd, &response_msg) < 0)
			_kill_job_on_msg_fail(job_ptr->job_id);
		xfree(alloc_msg.cpu_count_reps);
		xfree(alloc_msg.cpus_per_node);
		xfree(alloc_msg.node_list);
		select_g_select_jobinfo_free(alloc_msg.select_jobinfo);
		schedule_job_save();	/* has own locks */
		schedule_node_save();	/* has own locks */
			unlock_slurmctld(job_write_lock);
			_throttle_fini(&active_rpc_cnt);
		}
		info("_slurm_rpc_allocate_resources: %s ",
		     slurm_strerror(error_code));
		slurm_send_rc_msg(msg, error_code);
	}
}

/* _slurm_rpc_dump_conf - process RPC for Slurm configuration information */
static void _slurm_rpc_dump_conf(slurm_msg_t * msg)
{
	slurm_msg_t response_msg;
	last_update_msg_t *last_time_msg = (last_update_msg_t *) msg->data;
	slurm_ctl_conf_info_msg_t config_tbl;
	/* Locks: Read config, partition*/
		READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK };
	uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL);
	debug2("Processing RPC: REQUEST_BUILD_INFO from uid=%d",
	       uid);
	lock_slurmctld(config_read_lock);

	/* check to see if configuration data has changed */
	if ((last_time_msg->last_update - 1) >= slurmctld_conf.last_update) {
		unlock_slurmctld(config_read_lock);
		debug2("_slurm_rpc_dump_conf, no change");
		slurm_send_rc_msg(msg, SLURM_NO_CHANGE_IN_DATA);
	} else {
		_fill_ctld_conf(&config_tbl);
		unlock_slurmctld(config_read_lock);
		END_TIMER2("_slurm_rpc_dump_conf");

		/* init response_msg structure */
		slurm_msg_t_init(&response_msg);
		response_msg.protocol_version = msg->protocol_version;
		response_msg.address = msg->address;
		response_msg.msg_type = RESPONSE_BUILD_INFO;