-
Morris Jette authored
Error message changed from Missing time limit to Job submission rejected due to missing time limit bug 438
Morris Jette authoredError message changed from Missing time limit to Job submission rejected due to missing time limit bug 438
slurm_errno.c 16.84 KiB
/*****************************************************************************\
* slurm_errno.c - error codes and functions for slurm
******************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Jim Garlick <garlick@llnl.gov>, et. al.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
/* This implementation relies on "overloading" the libc errno by
* partitioning its domain into system (<1000) and SLURM (>=1000) values.
* SLURM API functions should call slurm_seterrno() to set errno to a value.
* API users should call slurm_strerror() to convert all errno values to
* their description strings.
*/
#if HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include "slurm/slurm_errno.h"
#include "src/common/slurm_jobcomp.h"
#include "src/common/switch.h"
/* Type for error string table entries */
typedef struct {
int xe_number;
char *xe_message;
} slurm_errtab_t;
/* Add new error values to slurm/slurm_errno.h, and their descriptions to this table */
static slurm_errtab_t slurm_errtab[] = {
{0, "No error"},
{-1, "Unspecified error"},
{EINPROGRESS, "Operation now in progress"},
/* General Message error codes */
{ SLURM_UNEXPECTED_MSG_ERROR,
"Unexpected message received" },
{ SLURM_COMMUNICATIONS_CONNECTION_ERROR,
"Communication connection failure" },
{ SLURM_COMMUNICATIONS_SEND_ERROR,
"Message send failure" },
{ SLURM_COMMUNICATIONS_RECEIVE_ERROR,
"Message receive failure" },
{ SLURM_COMMUNICATIONS_SHUTDOWN_ERROR,
"Communication shutdown failure" },
{ SLURM_PROTOCOL_VERSION_ERROR,
"Protocol version has changed, re-link your code" },
{ SLURM_PROTOCOL_IO_STREAM_VERSION_ERROR,
"I/O stream version number error" },
{ SLURM_PROTOCOL_AUTHENTICATION_ERROR,
"Protocol authentication error" },
{ SLURM_PROTOCOL_INSANE_MSG_LENGTH,
"Insane message length" },
{ SLURM_MPI_PLUGIN_NAME_INVALID,
"Invalid MPI plugin name" },
{ SLURM_MPI_PLUGIN_PRELAUNCH_SETUP_FAILED,
"MPI plugin's pre-launch setup failed" },
{ SLURM_PLUGIN_NAME_INVALID,
"Plugin initialization failed" },
{ SLURM_UNKNOWN_FORWARD_ADDR,
"Can't find an address, check slurm.conf" },
/* communication failures to/from slurmctld */
{ SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR,
"Unable to contact slurm controller (connect failure)" },
{ SLURMCTLD_COMMUNICATIONS_SEND_ERROR,
"Unable to contact slurm controller (send failure)" },
{ SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR,
"Unable to contact slurm controller (receive failure)" },
{ SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR,
"Unable to contact slurm controller (shutdown failure)"},
/* _info.c/communcation layer RESPONSE_SLURM_RC message codes */
{ SLURM_NO_CHANGE_IN_DATA, /* Not really an error */
"Data has not changed since time specified" },
/* slurmctld error codes */
{ ESLURM_INVALID_PARTITION_NAME,
"Invalid partition name specified" },
{ ESLURM_DEFAULT_PARTITION_NOT_SET,
"No partition specified or system default partition" },
{ ESLURM_ACCESS_DENIED,
"Access/permission denied" },
{ ESLURM_JOB_MISSING_REQUIRED_PARTITION_GROUP,
"User's group not permitted to use this partition" },
{ ESLURM_REQUESTED_NODES_NOT_IN_PARTITION,
"Requested nodes not in this partition" },
{ ESLURM_TOO_MANY_REQUESTED_CPUS,
"More processors requested than permitted" },
{ ESLURM_INVALID_NODE_COUNT,
"Node count specification invalid" },
{ ESLURM_ERROR_ON_DESC_TO_RECORD_COPY,
"Unable to create job record, try again" },
{ ESLURM_JOB_MISSING_SIZE_SPECIFICATION,
"Job size specification needs to be provided" },
{ ESLURM_JOB_SCRIPT_MISSING,
"Job script not specified" },
{ ESLURM_USER_ID_MISSING,
"Invalid user id" },
{ ESLURM_DUPLICATE_JOB_ID,
"Duplicate job id" },
{ ESLURM_PATHNAME_TOO_LONG,
"Pathname of a file, directory or other parameter too long" },
{ ESLURM_NOT_TOP_PRIORITY,
"Immediate execution impossible, insufficient priority" },
{ ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE,
"Requested node configuration is not available" },
{ ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE,
"Requested partition configuration not available now" },
{ ESLURM_NODES_BUSY,
"Requested nodes are busy" },
{ ESLURM_INVALID_JOB_ID,
"Invalid job id specified" },
{ ESLURM_INVALID_NODE_NAME,
"Invalid node name specified" },
{ ESLURM_WRITING_TO_FILE,
"I/O error writing script/environment to file" },
{ ESLURM_TRANSITION_STATE_NO_UPDATE,
"Job can not be altered now, try again later" },
{ ESLURM_ALREADY_DONE,
"Job/step already completing or completed" },
{ ESLURM_INTERCONNECT_FAILURE,
"Error configuring interconnect" },
{ ESLURM_BAD_DIST,
"Task distribution specification invalid" },
{ ESLURM_JOB_PENDING,
"Job is pending execution" },
{ ESLURM_BAD_TASK_COUNT,
"Task count specification invalid" },
{ ESLURM_INVALID_JOB_CREDENTIAL,
"Error generating job credential" },
{ ESLURM_IN_STANDBY_MODE,
"Slurm backup controller in standby mode" },
{ ESLURM_INVALID_NODE_STATE,
"Invalid node state specified" },
{ ESLURM_INVALID_FEATURE,
"Invalid feature specification" },
{ ESLURM_INVALID_AUTHTYPE_CHANGE,
"AuthType change requires restart of all SLURM daemons and "
"commands to take effect"},
{ ESLURM_INVALID_CHECKPOINT_TYPE_CHANGE,
"CheckpointType change requires restart of all SLURM daemons "
"to take effect" },
{ ESLURM_INVALID_CRYPTO_TYPE_CHANGE,
"CryptoType change requires restart of all SLURM daemons "
"to take effect" },
{ ESLURM_INVALID_SCHEDTYPE_CHANGE,
"SchedulerType change requires restart of the slurmctld daemon "
"to take effect" },
{ ESLURM_INVALID_SELECTTYPE_CHANGE,
"SelectType change requires restart of the slurmctld daemon "
"to take effect" },
{ ESLURM_INVALID_SWITCHTYPE_CHANGE,
"SwitchType change requires restart of all SLURM daemons and "
"jobs to take effect" },
{ ESLURM_FRAGMENTATION,
"Immediate execution impossible, "
"resources too fragmented for allocation" },
{ ESLURM_NOT_SUPPORTED,
"Requested operation not supported on this system" },
{ ESLURM_DISABLED,
"Requested operation is presently disabled" },
{ ESLURM_DEPENDENCY,
"Job dependency problem" },
{ ESLURM_BATCH_ONLY,
"Only batch jobs are accepted or processed" },
{ ESLURM_TASKDIST_ARBITRARY_UNSUPPORTED,
"Current SwitchType does not permit arbitrary task distribution"},
{ ESLURM_TASKDIST_REQUIRES_OVERCOMMIT,
"Requested more tasks than available processors" },
{ ESLURM_JOB_HELD,
"Job is in held state, pending scheduler release" },
{ ESLURM_INVALID_TASK_MEMORY,
"Memory required by task is not available" },
{ ESLURM_INVALID_ACCOUNT,
"Invalid account or account/partition combination specified"},
{ ESLURM_INVALID_PARENT_ACCOUNT,
"Invalid parent account specified" },
{ ESLURM_SAME_PARENT_ACCOUNT,
"Account already child of parent account specified" },
{ ESLURM_INVALID_QOS,
"Job has invalid qos" },
{ ESLURM_INVALID_WCKEY,
"Job has invalid wckey" },
{ ESLURM_INVALID_LICENSES,
"Invalid license specification" },
{ ESLURM_NEED_RESTART,
"The node configuration changes that were made require restart "
"of the slurmctld daemon to take effect"},
{ ESLURM_ACCOUNTING_POLICY,
"Job violates accounting/QOS policy (job submit limit, user's "
"size and/or time limits)"},
{ ESLURM_INVALID_TIME_LIMIT,
"Requested time limit is invalid (missing or exceeds some limit)"},
{ ESLURM_RESERVATION_ACCESS,
"Access denied to requested reservation" },
{ ESLURM_RESERVATION_INVALID,
"Requested reservation is invalid" },
{ ESLURM_INVALID_TIME_VALUE,
"Invalid time specified" },
{ ESLURM_RESERVATION_BUSY,
"Requested reservation is in use" },
{ ESLURM_RESERVATION_NOT_USABLE,
"Requested reservation not usable now" },
{ ESLURM_RESERVATION_OVERLAP,
"Requested reservation overlaps with another reservation" },
{ ESLURM_PORTS_BUSY,
"Required ports are in use" },
{ ESLURM_PORTS_INVALID,
"Requires more ports than can be reserved" },
{ ESLURM_PROLOG_RUNNING,
"SlurmctldProlog is still running" },
{ ESLURM_NO_STEPS,
"Job steps can not be run on this cluster" },
{ ESLURM_INVALID_BLOCK_STATE,
"Invalid block state specified" },
{ ESLURM_INVALID_BLOCK_LAYOUT,
"Functionality not available with current block layout mode"},
{ ESLURM_INVALID_BLOCK_NAME,
"Invalid block name specified" },
{ ESLURM_QOS_PREEMPTION_LOOP,
"QOS Preemption loop detected" },
{ ESLURM_NODE_NOT_AVAIL,
"Required node not available (down or drained)" },
{ ESLURM_INVALID_CPU_COUNT,
"CPU count specification invalid" },
{ ESLURM_PARTITION_NOT_AVAIL,
"Required partition not available (inactive or drain)"},
{ ESLURM_CIRCULAR_DEPENDENCY,
"Circular job dependency" },
{ ESLURM_INVALID_GRES,
"Invalid generic resource (gres) specification" },
{ ESLURM_JOB_NOT_PENDING,
"Job is no longer pending execution" },
{ ESLURM_QOS_THRES,
"Requested account has breached requested QOS usage threshold"},
{ ESLURM_PARTITION_IN_USE,
"Partition is in use" },
{ ESLURM_STEP_LIMIT,
"Step limit reached for this job" },
{ ESLURM_JOB_SUSPENDED,
"Job is current suspended, requested operation disabled" },
{ ESLURM_CAN_NOT_START_IMMEDIATELY,
"Job can not start immediately" },
{ ESLURM_INTERCONNECT_BUSY,
"Switch resources currently not available" },
{ ESLURM_RESERVATION_EMPTY,
"Reservation request lacks users or accounts" },
{ ESLURM_INVALID_ARRAY,
"Invalid job array specification" },
/* slurmd error codes */
{ ESLRUMD_PIPE_ERROR_ON_TASK_SPAWN,
"Pipe error on task spawn" },
{ ESLURMD_KILL_TASK_FAILED,
"Kill task failed" },
{ ESLURMD_UID_NOT_FOUND,
"User not found on host" },
{ ESLURMD_GID_NOT_FOUND,
"Group ID not found on host" },
{ ESLURMD_INVALID_ACCT_FREQ,
"Invalid accounting frequency requested" },
{ ESLURMD_INVALID_JOB_CREDENTIAL,
"Invalid job credential" },
{ ESLURMD_CREDENTIAL_REVOKED,
"Job credential revoked" },
{ ESLURMD_CREDENTIAL_EXPIRED,
"Job credential expired" },
{ ESLURMD_CREDENTIAL_REPLAYED,
"Job credential replayed" },
{ ESLURMD_CREATE_BATCH_DIR_ERROR,
"Slurmd could not create a batch directory or file" },
{ ESLURMD_MODIFY_BATCH_DIR_ERROR,
"Slurmd could not chown or chmod a batch directory" },
{ ESLURMD_CREATE_BATCH_SCRIPT_ERROR,
"Slurmd could not create a batch script" },
{ ESLURMD_MODIFY_BATCH_SCRIPT_ERROR,
"Slurmd could not chown or chmod a batch script" },
{ ESLURMD_SETUP_ENVIRONMENT_ERROR,
"Slurmd could not set up environment for batch job" },
{ ESLURMD_SHARED_MEMORY_ERROR,
"Slurmd shared memory error" },
{ ESLURMD_SET_UID_OR_GID_ERROR,
"Slurmd could not set UID or GID" },
{ ESLURMD_SET_SID_ERROR,
"Slurmd could not set session ID" },
{ ESLURMD_CANNOT_SPAWN_IO_THREAD,
"Slurmd could not spawn I/O thread" },
{ ESLURMD_FORK_FAILED,
"Slurmd could not fork job" },
{ ESLURMD_EXECVE_FAILED,
"Slurmd could not execve job" },
{ ESLURMD_IO_ERROR,
"Slurmd could not connect IO" },
{ ESLURMD_PROLOG_FAILED,
"Job prolog failed" },
{ ESLURMD_EPILOG_FAILED,
"Job epilog failed" },
{ ESLURMD_SESSION_KILLED,
"Session manager killed" },
{ ESLURMD_TOOMANYSTEPS,
"Too many job steps on node" },
{ ESLURMD_STEP_EXISTS,
"Job step already exists" },
{ ESLURMD_JOB_NOTRUNNING,
"Job step not running" },
{ ESLURMD_STEP_SUSPENDED,
"Job step is suspended" },
{ ESLURMD_STEP_NOTSUSPENDED,
"Job step is not currently suspended" },
/* slurmd errors in user batch job */
{ ESCRIPT_CHDIR_FAILED,
"unable to change directory to work directory" },
{ ESCRIPT_OPEN_OUTPUT_FAILED,
"cound not open output file" },
{ ESCRIPT_NON_ZERO_RETURN,
"Script terminated with non-zero exit code" },
/* socket specific SLURM communications error */
{ SLURM_PROTOCOL_SOCKET_IMPL_ZERO_RECV_LENGTH,
"Received zero length message" },
{ SLURM_PROTOCOL_SOCKET_IMPL_NEGATIVE_RECV_LENGTH,
"Received message length < 0" },
{ SLURM_PROTOCOL_SOCKET_IMPL_NOT_ALL_DATA_SENT,
"Failed to send entire message" },
{ ESLURM_PROTOCOL_INCOMPLETE_PACKET,
"Header lengths are longer than data received" },
{ SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT,
"Socket timed out on send/recv operation" },
{ SLURM_PROTOCOL_SOCKET_ZERO_BYTES_SENT,
"Zero Bytes were transmitted or received" },
/* slurm_auth errors */
{ ESLURM_AUTH_CRED_INVALID,
"Invalid authentication credential" },
{ ESLURM_AUTH_FOPEN_ERROR,
"Failed to open authentication public key" },
{ ESLURM_AUTH_NET_ERROR,
"Failed to connect to authentication agent" },
/* accounting errors */
{ ESLURM_DB_CONNECTION,
"Unable to connect to database" },
{ ESLURM_JOBS_RUNNING_ON_ASSOC,
"Job(s) active, cancel job(s) before remove" },
{ ESLURM_CLUSTER_DELETED,
"Cluster deleted, commit/rollback immediately" },
{ ESLURM_ONE_CHANGE,
"Can only change one at a time" },
{ ESLURM_BAD_NAME,
"Unacceptable name given. (No '.' in name allowed)" },
/* plugin and custom errors */
{ ESLURM_MISSING_TIME_LIMIT,
"Job submission rejected due to missing time limit" }
};
/*
* Linear search through table of errno values and strings,
* returns NULL on error, string on success.
*/
static char *_lookup_slurm_api_errtab(int errnum)
{
char *res = NULL;
int i;
for (i = 0; i < sizeof(slurm_errtab) / sizeof(slurm_errtab_t); i++) {
if (slurm_errtab[i].xe_number == errnum) {
res = slurm_errtab[i].xe_message;
break;
}
}
if ((res == NULL) &&
(errnum >= ESLURM_JOBCOMP_MIN) &&
(errnum <= ESLURM_JOBCOMP_MAX))
res = g_slurm_jobcomp_strerror(errnum);
#if 0
/* If needed, re-locate slurmctld/sched_plugin.[ch] into common */
if ((res == NULL) &&
(errnum >= ESLURM_SCHED_MIN) &&
(errnum <= ESLURM_SCHED_MAX))
res = sched_strerror(errnum);
#endif
if ((res == NULL) &&
(errnum >= ESLURM_SWITCH_MIN) &&
(errnum <= ESLURM_SWITCH_MAX))
res = switch_strerror(errnum);
return res;
}
/*
* Return string associated with error (SLURM or system).
* Always returns a valid string (because strerror always does).
*/
char *slurm_strerror(int errnum)
{
char *res = _lookup_slurm_api_errtab(errnum);
return (res ? res : strerror(errnum));
}
/*
* Get errno
*/
int slurm_get_errno(void)
{
return errno;
}
/*
* Set errno to the specified value.
*/
void slurm_seterrno(int errnum)
{
#ifdef __set_errno
__set_errno(errnum);
#else
errno = errnum;
#endif
}
/*
* Print "message: error description" on stderr for current errno value.
*/
void slurm_perror(char *msg)
{
fprintf(stderr, "%s: %s\n", msg, slurm_strerror(errno));
}