diff --git a/configure.ac b/configure.ac index be7b4dc2a3958836d74bc5f247e690f14e5f2c20..50baf2814fedf6c63dd5639e670292623b488940 100644 --- a/configure.ac +++ b/configure.ac @@ -103,8 +103,12 @@ CFLAGS="$CFLAGS $PTHREAD_CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" case "$host" in - *-*-aix*) LDFLAGS="$LDFLAGS -Wl,-brtl -Wl,-bexpall " ;; + *-*-aix*) LDFLAGS="$LDFLAGS -Wl,-brtl -Wl,-bexpall" + CMD_LDFLAGS="-Wl,-bgcbypass:1000" # keep all common functions + AC_DEFINE(USE_ALIAS, 0, [Define slurm_ prefix function aliases for plusins]) ;; + *) AC_DEFINE(USE_ALIAS, 1, [Define slurm_ prefix function aliases for plugins]) ;; esac +AC_SUBST(CMD_LDFLAGS) AC_SLURM_SEMAPHORE diff --git a/slurm/bnr.h b/slurm/bnr.h index 25b4b727125f5b9ff84b64076bfc87eb6248206b..ffadeb1e0eb2e9f3f40dd1450ecf64a645eaac27 100644 --- a/slurm/bnr.h +++ b/slurm/bnr.h @@ -13,11 +13,11 @@ typedef int BNR_gid; #define BNR_SUCCESS 0 #define BNR_ERROR 1 -int BNR_Init(BNR_gid *mygid); -int BNR_Put(BNR_gid gid, char *attr, char *val); -int BNR_Fence(BNR_gid gid); -int BNR_Get(BNR_gid gid, char *attr, char *val); -int BNR_Finalize(); -int BNR_Rank(BNR_gid group, int *myrank); -int BNR_Nprocs(BNR_gid group, int *nprocs); +extern int BNR_Init(BNR_gid *mygid); +extern int BNR_Put(BNR_gid gid, char *attr, char *val); +extern int BNR_Fence(BNR_gid gid); +extern int BNR_Get(BNR_gid gid, char *attr, char *val); +extern int BNR_Finalize(); +extern int BNR_Rank(BNR_gid group, int *myrank); +extern int BNR_Nprocs(BNR_gid group, int *nprocs); diff --git a/src/common/cbuf.c b/src/common/cbuf.c index 9aca5a9bc4dbcc84a7a6bbaa6e875473b73b63c8..bfcb23cf6217670f171f1f386736c8201a6b1fc1 100644 --- a/src/common/cbuf.c +++ b/src/common/cbuf.c @@ -1275,7 +1275,7 @@ cbuf_get_fd (void *dstbuf, int *psrcfd, int len) do { n = read(*psrcfd, dstbuf, len); - } while ((n < 0) && (errno == EINTR)); + } while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN))); return(n); } diff --git a/src/common/log.c b/src/common/log.c index 8f5a18a44b2f3991991f8b9438f98202985367d5..3237f662db67fa7a4770bc66b4837a3aca9aac47 100644 --- a/src/common/log.c +++ b/src/common/log.c @@ -414,6 +414,28 @@ static char *vxstrfmt(const char *fmt, va_list ap) } else xstrcat(buf, "%d"); break; + case 'u': + if (unprocessed == 0) { + snprintf(tmp, sizeof(tmp), "%u", + va_arg(ap, int)); + xstrcat(buf, tmp); + } else + xstrcat(buf, "%u"); + break; + case 'l': + if ((unprocessed == 0) && (*(p+1) == 'u')) { + snprintf(tmp, sizeof(tmp), "%lu", + va_arg(ap, long unsigned)); + xstrcat(buf, tmp); + p++; + } else if ((unprocessed==0) && (*(p+1)=='d')) { + snprintf(tmp, sizeof(tmp), "%ld", + va_arg(ap, long int)); + xstrcat(buf, tmp); + p++; + } else + xstrcat(buf, "%l"); + break; default: /* try to handle the rest */ xstrcatchar(buf, '%'); xstrcatchar(buf, *p); diff --git a/src/common/macros.h b/src/common/macros.h index 45c18c9c50400cafc500ae12b3b2af46c2a3e9b2..d654fa37d0d14e7be486862edfca711547488e28 100644 --- a/src/common/macros.h +++ b/src/common/macros.h @@ -192,17 +192,47 @@ typedef enum {false, true} bool; } \ } _STMT_END +# ifdef PTHREAD_SCOPE_SYSTEM +# define slurm_attr_init(attr) \ + _STMT_START { \ + if (pthread_attr_init(attr)) \ + fatal("pthread_attr_init: %m"); \ + /* we want 1:1 threads if there is a choice */ \ + if (pthread_attr_setscope(attr, PTHREAD_SCOPE_SYSTEM)) \ + error("pthread_attr_setscope: %m"); \ + if (pthread_attr_setstacksize(attr, 1024*1024)) \ + error("pthread_attr_setstacksize: %m"); \ + } _STMT_END +# else +# define slurm_attr_init(attr) \ + _STMT_START { \ + if (pthread_attr_init(attr)) \ + fatal("pthread_attr_init: %m"); \ + if (pthread_attr_setstacksize(attr, 1024*1024)) \ + error("pthread_attr_setstacksize: %m"); \ + } _STMT_END +# endif + #else /* !WITH_PTHREADS */ # define slurm_mutex_init(mutex) # define slurm_mutex_destroy(mutex) # define slurm_mutex_lock(mutex) # define slurm_mutex_unlock(mutex) +# define slurm_attr_init(attr) #endif /* WITH_PTHREADS */ #ifndef strong_alias -# define strong_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute ((alias (#name))) +# if USE_ALIAS +# define strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute ((alias (#name))) +# else + /* dummy function definition, + * confirm "aliasname" is free and waste "name" */ +# define strong_alias(name, aliasname) \ + extern void aliasname(int name) +# endif #endif + #endif /* !_MACROS_H */ diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 3a6ddc4ecf2fbbf4b546da4527733fc03774bc3d..a9901160ba7096646d3637c1518165d422205016 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -31,8 +31,10 @@ #include <slurm/slurm_errno.h> -#include <stdarg.h> #include <fcntl.h> +#include <stdarg.h> +#include <stdlib.h> +#include <sys/time.h> /* * OpenSSL includes @@ -395,14 +397,21 @@ slurm_cred_faker(slurm_cred_arg_t *arg) cred->ctime = time(NULL); cred->siglen = SLURM_IO_KEY_SIZE; - if ((fd = open("/dev/urandom", O_RDONLY)) < 0) - error ("unable to open /dev/random: %m"); - cred->signature = xmalloc(cred->siglen * sizeof(char)); - read(fd, cred->signature, cred->siglen); - if (close(fd) < 0) - error ("close(/dev/random): %m"); + if ((fd = open("/dev/urandom", O_RDONLY)) >= 0) { + read(fd, cred->signature, cred->siglen); + if (close(fd) < 0) + error ("close(/dev/urandom): %m"); + } else { /* Note: some systems lack this file */ + unsigned int i; + struct timeval tv; + gettimeofday(&tv, NULL); + i = (unsigned int) (tv.tv_sec + tv.tv_usec); + srand((unsigned int) i); + for (i=0; i<cred->siglen; i++) + cred->signature[i] = (rand() & 0xff); + } slurm_mutex_unlock(&cred->mutex); return cred; @@ -966,6 +975,7 @@ _slurm_cred_sign(slurm_cred_ctx_t ctx, slurm_cred_t cred) rc = SLURM_ERROR; } + EVP_MD_CTX_cleanup(&ectx); free_buf(buffer); return rc; diff --git a/src/common/slurm_protocol_interface.h b/src/common/slurm_protocol_interface.h index c8f36b3e00246a381b9b94037d3f72d0b0556c8d..b302fa0276532bbc67b6f9c63187fe1b4fd1cd8e 100644 --- a/src/common/slurm_protocol_interface.h +++ b/src/common/slurm_protocol_interface.h @@ -65,7 +65,7 @@ #include "src/common/pack.h" #include "src/common/slurm_protocol_common.h" -#define SLURM_MESSAGE_TIMEOUT_MSEC_STATIC 3000 +#define SLURM_MESSAGE_TIMEOUT_MSEC_STATIC 5000 /****************\ ** Data Types ** diff --git a/src/common/slurm_protocol_socket_implementation.c b/src/common/slurm_protocol_socket_implementation.c index 47ca7ddd17d10f381c5d87f73ad0b0dcac0b819d..e9f15b84eda528e3026823f95fa7049358c815c7 100644 --- a/src/common/slurm_protocol_socket_implementation.c +++ b/src/common/slurm_protocol_socket_implementation.c @@ -252,7 +252,7 @@ int _slurm_send_timeout(slurm_fd fd, char *buf, size_t size, goto done; } if ((rc = poll(&ufds, 1, timeout)) <= 0) { - if ((rc == 0) || (errno == EINTR)) + if ((rc == 0) || (errno == EINTR) || (errno == EAGAIN)) continue; else { debug("_slurm_send_timeout at %d of %d, " @@ -329,7 +329,7 @@ int _slurm_recv_timeout(slurm_fd fd, char *buffer, size_t size, } if ((rc = poll(&ufds, 1, timeout)) <= 0) { - if ((errno == EINTR) || (rc == 0)) + if ((errno == EINTR) || (errno == EAGAIN) || (rc == 0)) continue; else { debug("_slurm_recv_timeout at %d of %d, " diff --git a/src/common/slurm_protocol_util.c b/src/common/slurm_protocol_util.c index 638c2a3da2e4d23b619c73316ec3af636efdea26..cf0da53c6f0e299415ec75d4ae5045abef30f1e1 100644 --- a/src/common/slurm_protocol_util.c +++ b/src/common/slurm_protocol_util.c @@ -62,6 +62,7 @@ void init_header(header_t * header, slurm_msg_type_t msg_type, header->version = SLURM_PROTOCOL_VERSION; header->flags = flags; header->msg_type = msg_type; + header->body_length = 0; /* over-written later */ } /* diff --git a/src/common/slurm_xlator.h b/src/common/slurm_xlator.h index 2774f0b18680002f015d0a49450e94825782fc4d..52b00fb34dff2025213d3b3f37fec15e06c13f65 100644 --- a/src/common/slurm_xlator.h +++ b/src/common/slurm_xlator.h @@ -10,7 +10,7 @@ * * All SLURM functions referenced from the switch and auth plugins should * have aliases established. Functions not referenced from the plugins - * need not be aliased. + * need not be aliased. * * To use this header file: * 1. In the module containing the exported function code, establish an @@ -24,6 +24,8 @@ * and remove other slurm header files (they should all be in this header). * This logic will have the plugin link only to the function names with * the "slurm_" prefix. + * + * NOTE: Not all operating systems support this function aliasing (e.g. AIX). ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -51,6 +53,12 @@ #ifndef __SLURM_XLATOR_H__ #define __SLURM_XLATOR_H__ +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#if USE_ALIAS + /* arg_desc.[ch] functions*/ #define arg_count slurm_arg_count #define arg_idx_by_name slurm_arg_idx_by_name @@ -233,6 +241,8 @@ #define xstrdup slurm_xstrdup #define xbasename slurm_xbasename +#endif /* USE_ALIAS */ + /* Include the function definitions after redefining their names. */ #include "src/common/arg_desc.h" #include "src/common/bitstring.h" diff --git a/src/common/xmalloc.c b/src/common/xmalloc.c index 7678fc6d6c0a9779d7e64f0a7150a2555503167b..42eb90a6c3013c3ffbe478c856ee1e559f5cf804 100644 --- a/src/common/xmalloc.c +++ b/src/common/xmalloc.c @@ -264,7 +264,7 @@ void slurm_xfree(void **item, const char *file, int line, const char *func) static void malloc_assert_failed(char *expr, const char *file, int line, const char *caller, const char *func) { - fatal("%s() Error: from %s:%d: %s(): Assertion (%s) failed", + error("%s() Error: from %s:%d: %s(): Assertion (%s) failed", func, file, line, caller, expr); abort(); } diff --git a/src/scancel/Makefile.am b/src/scancel/Makefile.am index ab37c7449bae4b7571b13e85a4cb5bc3df85e4a1..2681e24baeb98a54057c0f4e47428b4e2fd3a48b 100644 --- a/src/scancel/Makefile.am +++ b/src/scancel/Makefile.am @@ -14,6 +14,8 @@ scancel_LDADD = \ noinst_HEADERS = scancel.h scancel_SOURCES = scancel.c opt.c +scancel_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) + force: $(scancel_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/scontrol/Makefile.am b/src/scontrol/Makefile.am index 858c07f1635e6a4169f3f1a363d480e22559639f..781a0b559f2f6ce17177c2c81281cb6e8e5943b8 100644 --- a/src/scontrol/Makefile.am +++ b/src/scontrol/Makefile.am @@ -12,5 +12,7 @@ LDADD = \ $(top_builddir)/src/api/libslurm.la \ $(READLINE_LIBS) - scontrol_SOURCES = scontrol.c + +scontrol_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) + diff --git a/src/sinfo/Makefile.am b/src/sinfo/Makefile.am index 963529ba6af11c437cb2f45399754386ac6ba0c9..78b6bfe91cd677217b38d2f23f78c49ff9448ef6 100644 --- a/src/sinfo/Makefile.am +++ b/src/sinfo/Makefile.am @@ -18,3 +18,5 @@ force: $(sinfo_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` +sinfo_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) + diff --git a/src/slurmctld/Makefile.am b/src/slurmctld/Makefile.am index 47d748ad90b5e18b4ed9f2bd88c2b431362b5f2b..23f4356b11b0f86d3d614e26594d3edc904e23be 100644 --- a/src/slurmctld/Makefile.am +++ b/src/slurmctld/Makefile.am @@ -42,8 +42,7 @@ slurmctld_SOURCES = \ state_save.h \ step_mgr.c - -slurmctld_LDFLAGS = -export-dynamic +slurmctld_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) force: $(slurmctld_LDADD) : force diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 9c3a8ca90cd3c1de94cd8167d39e1253f3ba9790..afe7efbcff1ca526fa1411174b28a5a6ad01de2c 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -188,15 +188,10 @@ void *agent(void *args) thread_ptr = agent_info_ptr->thread_struct; /* start the watchdog thread */ - if (pthread_attr_init(&attr_wdog)) - fatal("pthread_attr_init error %m"); + slurm_attr_init(&attr_wdog); if (pthread_attr_setdetachstate (&attr_wdog, PTHREAD_CREATE_JOINABLE)) error("pthread_attr_setdetachstate error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope(&attr_wdog, PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif while (pthread_create(&thread_wdog, &attr_wdog, _wdog, (void *) agent_info_ptr)) { error("pthread_create error %m"); @@ -222,16 +217,10 @@ void *agent(void *args) * _thread_per_node_rpc() */ task_specific_ptr = _make_task_data(agent_info_ptr, i); - if (pthread_attr_init(&thread_ptr[i].attr)) - fatal("pthread_attr_init error %m"); + slurm_attr_init(&thread_ptr[i].attr); if (pthread_attr_setdetachstate(&thread_ptr[i].attr, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope(&thread_ptr[i].attr, - PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif while ((rc = pthread_create(&thread_ptr[i].thread, &thread_ptr[i].attr, _thread_per_node_rpc, @@ -856,15 +845,10 @@ void agent_queue_request(agent_arg_t *agent_arg_ptr) if (agent_cnt < MAX_AGENT_CNT) { /* execute now */ pthread_attr_t attr_agent; pthread_t thread_agent; - if (pthread_attr_init(&attr_agent)) - fatal("pthread_attr_init error %m"); + slurm_attr_init(&attr_agent); if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope(&attr_agent, PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif if (pthread_create(&thread_agent, &attr_agent, agent, (void *) agent_arg_ptr) == 0) return; @@ -896,15 +880,10 @@ static void _spawn_retry_agent(agent_arg_t * agent_arg_ptr) debug2("Spawning RPC agent for msg_type %u", agent_arg_ptr->msg_type); - if (pthread_attr_init(&attr_agent)) - fatal("pthread_attr_init error %m"); + slurm_attr_init(&attr_agent); if (pthread_attr_setdetachstate(&attr_agent, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope(&attr_agent, PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif while (pthread_create(&thread_agent, &attr_agent, agent, (void *) agent_arg_ptr)) { error("pthread_create error %m"); diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c index 047fc01f53d33f5826c65ec52c540a639ed4f5b9..6c7d7329ae51f706d11b4efbe5bd177840bb399b 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -90,13 +90,7 @@ void run_backup(void) /* * create attached thread to process RPCs */ - if (pthread_attr_init(&thread_attr_rpc)) - fatal("pthread_attr_init error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - /* we want 1:1 threads if there is a choice */ - if (pthread_attr_setscope(&thread_attr_rpc, PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif + slurm_attr_init(&thread_attr_rpc); if (pthread_create(&slurmctld_config.thread_id_rpc, &thread_attr_rpc, _background_rpc_mgr, NULL)) fatal("pthread_create error %m"); @@ -104,13 +98,7 @@ void run_backup(void) /* * create attached thread for signal handling */ - if (pthread_attr_init(&thread_attr_sig)) - fatal("pthread_attr_init error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - /* we want 1:1 threads if there is a choice */ - if (pthread_attr_setscope(&thread_attr_sig, PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif + slurm_attr_init(&thread_attr_sig); if (pthread_create(&slurmctld_config.thread_id_sig, &thread_attr_sig, _background_signal_hand, NULL)) fatal("pthread_create %m"); diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 2a4085d935d6984a605ab25bcf0df08723dad3f2..3aa75a0b373a3630e24a0632768fcb7d48ed9171 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -253,14 +253,7 @@ int main(int argc, char *argv[]) slurm_mutex_lock(&slurmctld_config.thread_count_lock); slurmctld_config.server_thread_count++; slurm_mutex_unlock(&slurmctld_config.thread_count_lock); - if (pthread_attr_init(&thread_attr_rpc)) - fatal("pthread_attr_init error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - /* we want 1:1 threads if there is a choice */ - if (pthread_attr_setscope - (&thread_attr_rpc, PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif + slurm_attr_init(&thread_attr_rpc); if (pthread_create(&slurmctld_config.thread_id_rpc, &thread_attr_rpc,_slurmctld_rpc_mgr, NULL)) fatal("pthread_create error %m"); @@ -268,14 +261,7 @@ int main(int argc, char *argv[]) /* * create attached thread for signal handling */ - if (pthread_attr_init(&thread_attr_sig)) - fatal("pthread_attr_init error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - /* we want 1:1 threads if there is a choice */ - if (pthread_attr_setscope(&thread_attr_sig, - PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif + slurm_attr_init(&thread_attr_sig); if (pthread_create(&slurmctld_config.thread_id_sig, &thread_attr_sig, _slurmctld_signal_hand, NULL)) @@ -284,14 +270,7 @@ int main(int argc, char *argv[]) /* * create attached thread for state save */ - if (pthread_attr_init(&thread_attr_save)) - fatal("pthread_attr_init error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - /* we want 1:1 threads if there is a choice */ - if (pthread_attr_setscope(&thread_attr_save, - PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif + slurm_attr_init(&thread_attr_save); if (pthread_create(&slurmctld_config.thread_id_save, &thread_attr_save, slurmctld_state_save, NULL)) @@ -492,17 +471,10 @@ static void *_slurmctld_rpc_mgr(void *no_data) debug3("_slurmctld_rpc_mgr pid = %u", getpid()); /* threads to process individual RPC's are detached */ - if (pthread_attr_init(&thread_attr_rpc_req)) - fatal("pthread_attr_init %m"); + slurm_attr_init(&thread_attr_rpc_req); if (pthread_attr_setdetachstate (&thread_attr_rpc_req, PTHREAD_CREATE_DETACHED)) fatal("pthread_attr_setdetachstate %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - /* we want 1:1 threads if there is a choice */ - if (pthread_attr_setscope - (&thread_attr_rpc_req, PTHREAD_SCOPE_SYSTEM)) - error("pthread_attr_setscope error %m"); -#endif /* initialize port for RPCs */ lock_slurmctld(config_read_lock); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 5382775de8af46231ea2d726ad65c81a38275a7d..dbefae2d8ef3c13ee8c79de14cea3409913569d7 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -809,17 +809,22 @@ void _add_job_hash(struct job_record *job_ptr) struct job_record *find_job_record(uint32_t job_id) { int i; + struct job_record *job_ptr; /* First try to find via hash table */ - if (job_hash[JOB_HASH_INX(job_id)] && - job_hash[JOB_HASH_INX(job_id)]->job_id == job_id) - return job_hash[JOB_HASH_INX(job_id)]; + job_ptr = job_hash[JOB_HASH_INX(job_id)]; + if (job_ptr && (job_ptr->job_id == job_id)) { + xassert (job_ptr->magic == JOB_MAGIC); + return job_ptr; + } /* linear search of overflow hash table overflow */ for (i = 0; i < max_hash_over; i++) { - if (job_hash_over[i] != NULL && - job_hash_over[i]->job_id == job_id) - return job_hash_over[i]; + job_ptr = job_hash_over[i]; + if (job_ptr && (job_ptr->job_id == job_id)) { + xassert (job_ptr->magic == JOB_MAGIC); + return job_ptr; + } } return NULL; @@ -1044,8 +1049,12 @@ void dump_job_desc(job_desc_msg_t * job_specs) (long) job_specs->kill_on_node_fail : -1L; task_dist = (job_specs->task_dist != (uint16_t) NO_VAL) ? (long) job_specs->task_dist : -1L; - debug3(" kill_on_node_fail=%ld task_dist=%ld script=%.40s...", - kill_on_node_fail, task_dist, job_specs->script); + if (job_specs->script) /* log has problem with string len & null */ + debug3(" kill_on_node_fail=%ld task_dist=%ld script=%.40s...", + kill_on_node_fail, task_dist, job_specs->script); + else + debug3(" kill_on_node_fail=%ld task_dist=%ld script=%s", + kill_on_node_fail, task_dist, job_specs->script); if (job_specs->argc == 1) debug3(" argv=\"%s\"", diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index a95a6e28489b8c3c9011895a473dc04bc68bdcd5..63a5b3f854f91114b9a4588765c376c035967541 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -42,6 +42,7 @@ #include <fcntl.h> #include "src/common/hostlist.h" +#include "src/common/macros.h" #include "src/common/pack.h" #include "src/common/xassert.h" #include "src/common/xstring.h" @@ -111,7 +112,6 @@ char * bitmap2node_name (bitstr_t *bitmap) hostlist_uniq(hl); hostlist_ranged_string(hl, 8192, buf); hostlist_destroy(hl); - return xstrdup(buf); } @@ -1362,16 +1362,10 @@ void msg_to_slurmd (slurm_msg_type_t msg_type) xfree (kill_agent_args); else { debug ("Spawning agent msg_type=%d", msg_type); - if (pthread_attr_init (&kill_attr_agent)) - fatal ("pthread_attr_init error %m"); + slurm_attr_init (&kill_attr_agent); if (pthread_attr_setdetachstate (&kill_attr_agent, PTHREAD_CREATE_DETACHED)) error ("pthread_attr_setdetachstate error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (&kill_attr_agent, - PTHREAD_SCOPE_SYSTEM)) - error ("pthread_attr_setscope error %m"); -#endif while (pthread_create (&kill_thread_agent, &kill_attr_agent, agent, (void *)kill_agent_args)) { error ("pthread_create error %m"); diff --git a/src/slurmctld/ping_nodes.c b/src/slurmctld/ping_nodes.c index 729d7f291c9f0764c408637206c0dbe44e000e73..1a3a06c764618b90e1940862a6caab905f9b4bb5 100644 --- a/src/slurmctld/ping_nodes.c +++ b/src/slurmctld/ping_nodes.c @@ -230,16 +230,10 @@ void ping_nodes (void) sizeof(host_str), host_str); debug2 ("Spawning ping agent for %s", host_str); ping_begin(); - if (pthread_attr_init (&ping_attr_agent)) - fatal ("pthread_attr_init error %m"); + slurm_attr_init (&ping_attr_agent); if (pthread_attr_setdetachstate (&ping_attr_agent, PTHREAD_CREATE_DETACHED)) error ("pthread_attr_setdetachstate error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (&ping_attr_agent, - PTHREAD_SCOPE_SYSTEM)) - error ("pthread_attr_setscope error %m"); -#endif while (pthread_create (&ping_thread_agent, &ping_attr_agent, agent, (void *)ping_agent_args)) { error ("pthread_create error %m"); @@ -257,16 +251,10 @@ void ping_nodes (void) sizeof(host_str), host_str); debug2 ("Spawning registration agent for %s", host_str); ping_begin(); - if (pthread_attr_init (®_attr_agent)) - fatal ("pthread_attr_init error %m"); + slurm_attr_init (®_attr_agent); if (pthread_attr_setdetachstate (®_attr_agent, PTHREAD_CREATE_DETACHED)) error ("pthread_attr_setdetachstate error %m"); -#ifdef PTHREAD_SCOPE_SYSTEM - if (pthread_attr_setscope (®_attr_agent, - PTHREAD_SCOPE_SYSTEM)) - error ("pthread_attr_setscope error %m"); -#endif while (pthread_create (®_thread_agent, ®_attr_agent, agent, (void *)reg_agent_args)) { error ("pthread_create error %m"); diff --git a/src/slurmd/Makefile.am b/src/slurmd/Makefile.am index a2f0169f16341cc2da48a5a45b50428a71042987..f10755ce8100eed3a53dd5325caaebadc13babfe 100644 --- a/src/slurmd/Makefile.am +++ b/src/slurmd/Makefile.am @@ -31,7 +31,7 @@ slurmd_SOURCES = \ ulimits.c ulimits.h \ setproctitle.c setproctitle.h -slurmd_LDFLAGS = -export-dynamic +slurmd_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) force: $(slurmd_LDADD) : force diff --git a/src/slurmd/io.c b/src/slurmd/io.c index a28be9c901337a9457c06e4e765f6aab6a433af0..9c1e7865c7730e668438abe942444173907cc4c3 100644 --- a/src/slurmd/io.c +++ b/src/slurmd/io.c @@ -52,6 +52,7 @@ #include "src/common/io_hdr.h" #include "src/common/cbuf.h" #include "src/common/log.h" +#include "src/common/macros.h" #include "src/common/fd.h" #include "src/common/list.h" #include "src/common/xmalloc.h" @@ -254,13 +255,7 @@ io_spawn_handler(slurmd_job_t *job) if (_io_prepare_tasks(job) < 0) return SLURM_FAILURE; - if ((errno = pthread_attr_init(&attr)) != 0) - error("pthread_attr_init: %m"); - -#ifdef PTHREAD_SCOPE_SYSTEM - if ((errno = pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM)) != 0) - error("pthread_attr_setscope: %m"); -#endif + slurm_attr_init(&attr); xassert(_validate_io_list(job->objs)); pthread_create(&job->ioid, &attr, &_io_thr, (void *)job); @@ -1412,7 +1407,8 @@ _task_error(io_obj_t *obj, List objs) if (getsockopt(obj->fd, SOL_SOCKET, SO_ERROR, &err, &size) < 0) error ("getsockopt: %m"); - _update_error_state(t, E_POLL, err); + else + _update_error_state(t, E_POLL, err); _obj_close(obj, objs); return -1; } @@ -1525,8 +1521,7 @@ _client_error(io_obj_t *obj, List objs) if (getsockopt(obj->fd, SOL_SOCKET, SO_ERROR, &err, &size) < 0) error ("getsockopt: %m"); - - if (err != ECONNRESET) /* Do not log connection resets */ + else if (err != ECONNRESET) /* Do not log connection resets */ _update_error_state(io, E_POLL, err); return 0; @@ -1578,7 +1573,13 @@ _update_error_state(struct io_info *io, enum error_type type, int err) { xassert(io != NULL); xassert(io->magic == IO_MAGIC); - xassert(err > 0); + + /* getsockopt(,,SO_ERROR,&err,) returns err value of -1 + * under AIX under some circumstances */ + if (err <= 0) { + error("Unspecified I/O error <task %d>", io->id); + return 0; + } if ( (io->err.e_type == type) && (io->err.e_last == err ) ) { diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c index 695e80852a0a8d5a46a9deaa55af1f34632ed0bc..5cb0a25cca2a0b8e0657a217c7ccc1162893472c 100644 --- a/src/slurmd/slurmd.c +++ b/src/slurmd/slurmd.c @@ -287,12 +287,7 @@ _handle_connection(slurm_fd fd, slurm_addr *cli) arg->fd = fd; arg->cli_addr = cli; - if ((rc = pthread_attr_init(&attr)) != 0) { - error("pthread_attr_init: %s", slurm_strerror(rc)); - xfree(arg); - return; - } - + slurm_attr_init(&attr); rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); if (rc != 0) { errno = rc; diff --git a/src/squeue/Makefile.am b/src/squeue/Makefile.am index e65d0d20cc06e47976ff7ccddd2fe8fb77492abc..bf5f857ed27e5a2d255fbfc49ec7a6f836e2f68f 100644 --- a/src/squeue/Makefile.am +++ b/src/squeue/Makefile.am @@ -14,3 +14,5 @@ LDADD = $(top_builddir)/src/common/libcommon.la \ noinst_HEADERS = squeue.h print.h squeue_SOURCES = squeue.c print.c opts.c sort.c +squeue_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) + diff --git a/src/srun/Makefile.am b/src/srun/Makefile.am index 0ee22cd47682f426c9a2658848585cdfe85e7fa6..35a19e276a652ac4fc95b02a76ff7424b36100a7 100644 --- a/src/srun/Makefile.am +++ b/src/srun/Makefile.am @@ -44,6 +44,8 @@ srun_LDADD = \ $(SRUN_WRAPPER_OBJ) \ $(convenience_libs) +srun_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) + EXTRA_srun_SOURCES = srun.wrapper.c srun_DEPENDENCIES = $(SRUN_WRAPPER_OBJ) diff --git a/src/srun/io.c b/src/srun/io.c index eedee8d41f03eb752762ad3d17bdcdcd092596d8..4535e4d8d586d0129e58f3f7246fa1dc1ee655f4 100644 --- a/src/srun/io.c +++ b/src/srun/io.c @@ -122,7 +122,7 @@ _handle_pollerr(fd_info_t *info) if (getsockopt(fd, SOL_SOCKET, SO_ERROR, (void *)&err, &size) < 0) error("_handle_error_poll: getsockopt: %m"); - if (err) + if (err > 0) debug3("%d: poll error on fd %d: %s", info->taskid, fd, slurm_strerror(err)); else @@ -557,7 +557,7 @@ _read_io_header(int fd, job_t *job, char *host) io_hdr_t hdr; if (cbuf_write_from_fd(cb, fd, size, NULL) < 0) { - error ("Bad stream header read: %m"); + error ("Bad stream header write: %m"); goto fail; } diff --git a/src/srun/msg.c b/src/srun/msg.c index c2146899193852661f0541f8023735d2a0121a86..41efc6075ef89c2759a36998674b0bf9a3e73844 100644 --- a/src/srun/msg.c +++ b/src/srun/msg.c @@ -610,12 +610,19 @@ static int _do_poll(job_t *job, struct pollfd *fds, int timeout) { nfds_t nfds = (job->njfds + 1); - int rc; + int rc, to; - while ((rc = poll(fds, nfds, timeout * 1000)) < 0) { + if (timeout > 0) + to = timeout * 1000; + else + to = timeout; + + while ((rc = poll(fds, nfds, to)) < 0) { switch (errno) { + case EAGAIN: case EINTR: continue; case ENOMEM: + case EINVAL: case EFAULT: fatal("poll: %m"); default: error("poll: %m. Continuing..."); continue;