diff --git a/NEWS b/NEWS index 426f9807b7250a5461c59d1e2a63a7fa1a2d3272..a755599e11f506fb6e6e1b75184cbeddac22ebf0 100644 --- a/NEWS +++ b/NEWS @@ -393,6 +393,8 @@ documents those changes that are of interest to users and administrators. -- Fix "squeue --start" to override SQUEUE_FORMAT env variable. -- Restore GRES functionality with select/linear plugin. It was broken in version 14.03.10. + -- Fix possible race condition when attempting to use QOS on a system running + accounting_storage/filetxt. * Changes in Slurm 14.03.10 =========================== diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 334dc3456759f3860a1636b68459d89eb179e28e..0822564a61ed9959e749070d890d6b2fc8cd22bf 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -1135,16 +1135,13 @@ static int _get_assoc_mgr_res_list(void *db_conn, int enforce) static int _get_assoc_mgr_qos_list(void *db_conn, int enforce) { uid_t uid = getuid(); + List new_list = NULL; assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; - assoc_mgr_lock(&locks); - if (assoc_mgr_qos_list) - list_destroy(assoc_mgr_qos_list); - assoc_mgr_qos_list = acct_storage_g_get_qos(db_conn, uid, NULL); + new_list = acct_storage_g_get_qos(db_conn, uid, NULL); - if (!assoc_mgr_qos_list) { - assoc_mgr_unlock(&locks); + if (!new_list) { if (enforce & ACCOUNTING_ENFORCE_ASSOCS) { error("_get_assoc_mgr_qos_list: no list was made."); return SLURM_ERROR; @@ -1153,9 +1150,16 @@ static int _get_assoc_mgr_qos_list(void *db_conn, int enforce) } } + assoc_mgr_lock(&locks); + + FREE_NULL_LIST(assoc_mgr_qos_list); + assoc_mgr_qos_list = new_list; + new_list = NULL; + _post_qos_list(assoc_mgr_qos_list); assoc_mgr_unlock(&locks); + return SLURM_SUCCESS; } @@ -1843,13 +1847,21 @@ extern int assoc_mgr_fill_in_assoc(void *db_conn, if (assoc_pptr) *assoc_pptr = NULL; - /* Call assoc_mgr_refresh_lists instead of just getting the - association list because we need qos and user lists before - the association list can be made. - */ - if (!assoc_mgr_association_list) - if (assoc_mgr_refresh_lists(db_conn) == SLURM_ERROR) - return SLURM_ERROR; + /* Since we might be locked we can't come in here and try to + * get the list since we would need the WRITE_LOCK to do that, + * so just return as this would only happen on a system not + * talking to the database. + */ + if (!assoc_mgr_association_list) { + int rc = SLURM_SUCCESS; + + if (enforce & ACCOUNTING_ENFORCE_QOS) { + error("No Association list available, " + "this should never happen"); + rc = SLURM_ERROR; + } + return rc; + } if ((!assoc_mgr_association_list || !list_count(assoc_mgr_association_list)) @@ -2100,14 +2112,28 @@ extern int assoc_mgr_fill_in_qos(void *db_conn, slurmdb_qos_rec_t *qos, if (qos_pptr) *qos_pptr = NULL; - if (!assoc_mgr_qos_list) - if (_get_assoc_mgr_qos_list(db_conn, enforce) == SLURM_ERROR) - return SLURM_ERROR; if (!locked) assoc_mgr_lock(&locks); - if ((!assoc_mgr_qos_list || !list_count(assoc_mgr_qos_list)) - && !(enforce & ACCOUNTING_ENFORCE_QOS)) { + + /* Since we might be locked we can't come in here and try to + * get the list since we would need the WRITE_LOCK to do that, + * so just return as this would only happen on a system not + * talking to the database. + */ + if (!assoc_mgr_qos_list) { + int rc = SLURM_SUCCESS; + + if (enforce & ACCOUNTING_ENFORCE_QOS) { + error("No QOS list available, " + "this should never happen"); + rc = SLURM_ERROR; + } + if (!locked) + assoc_mgr_unlock(&locks); + return rc; + } else if (!list_count(assoc_mgr_qos_list) + && !(enforce & ACCOUNTING_ENFORCE_QOS)) { if (!locked) assoc_mgr_unlock(&locks); return SLURM_SUCCESS; diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index b4db063d155de83f4bd9774e553b63320c592e2e..86c47f6bd1a072d00e19507a080fafb5fc399b63 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -595,6 +595,16 @@ extern void qos_list_build(char *qos, bitstr_t **qos_bits) /* Lock here to avoid g_qos_count changing under us */ assoc_mgr_lock(&locks); + if (!g_qos_count) { + error("We have no QOS on the system Ignoring invalid " + "Allow/DenyQOS value(s) %s", + qos); + assoc_mgr_unlock(&locks); + FREE_NULL_BITMAP(*qos_bits); + *qos_bits = NULL; + return; + } + tmp_qos_bitstr = bit_alloc(g_qos_count); tmp_qos = xstrdup(qos); one_qos_name = strtok_r(tmp_qos, ",", &name_ptr);