diff --git a/NEWS b/NEWS index 4e92c69dafa35523bae4bc356224c3380f4784d7..2ff7e341292630216d1e028839fbdd3523f7cb79 100644 --- a/NEWS +++ b/NEWS @@ -79,6 +79,8 @@ documents those changes that are of interest to users and administrators. the slurmctld. -- Log if CLOUD node configured without a resume/suspend program or suspend time. + -- MYSQL - Better locking around g_qos_count which was previously unprotected. + -- Correct size of buffer used for jobid2str to avoid truncation. * Changes in Slurm 15.08.6 ========================== diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 670923b4a353bd519964c4d95490be3df49b1520..d5660d659bf952a8e5579c530e45d2f38ee50cbb 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -222,6 +222,8 @@ static int _set_qos_cnt(mysql_conn_t *mysql_conn) MYSQL_RES *result = NULL; MYSQL_ROW row; char *query = xstrdup_printf("select MAX(id) from %s", qos_table); + assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK, + NO_LOCK, NO_LOCK, NO_LOCK }; if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { xfree(query); @@ -239,7 +241,9 @@ static int _set_qos_cnt(mysql_conn_t *mysql_conn) possible as an id we add 1 to the total to burn 0 and start at the 1 bit. */ + assoc_mgr_lock(&locks); g_qos_count = slurm_atoul(row[0]) + 1; + assoc_mgr_unlock(&locks); mysql_free_result(result); return SLURM_SUCCESS; diff --git a/src/plugins/accounting_storage/mysql/as_mysql_qos.c b/src/plugins/accounting_storage/mysql/as_mysql_qos.c index 5ea1b51ed330218c05a8c6d6558b321c0b7ce1b2..28867ff5c0fd6cf060cf37da6b4e56b4c05e3284 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_qos.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_qos.c @@ -498,6 +498,9 @@ extern int as_mysql_add_qos(mysql_conn_t *mysql_conn, uint32_t uid, int affect_rows = 0; int added = 0; char *added_preempt = NULL; + uint32_t qos_cnt; + assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK, + NO_LOCK, NO_LOCK, NO_LOCK }; if (check_connection(mysql_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; @@ -505,8 +508,13 @@ extern int as_mysql_add_qos(mysql_conn_t *mysql_conn, uint32_t uid, if (!is_user_min_admin_level(mysql_conn, uid, SLURMDB_ADMIN_SUPER_USER)) return ESLURM_ACCESS_DENIED; + assoc_mgr_lock(&locks); + qos_cnt = g_qos_count; + assoc_mgr_unlock(&locks); + user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(qos_list); + while ((object = list_next(itr))) { if (!object->name || !object->name[0]) { error("We need a qos name to add."); @@ -521,7 +529,7 @@ extern int as_mysql_add_qos(mysql_conn_t *mysql_conn, uint32_t uid, _setup_qos_limits(object, &cols, &vals, &extra, &added_preempt, 1); if (added_preempt) { - object->preempt_bitstr = bit_alloc(g_qos_count); + object->preempt_bitstr = bit_alloc(qos_cnt); bit_unfmt(object->preempt_bitstr, added_preempt+1); xfree(added_preempt); } @@ -612,6 +620,9 @@ extern List as_mysql_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid, char *tmp_char1=NULL, *tmp_char2=NULL; bitstr_t *preempt_bitstr = NULL; char *added_preempt = NULL; + uint32_t qos_cnt; + assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK, + NO_LOCK, NO_LOCK, NO_LOCK }; if (!qos_cond || !qos) { error("we need something to change"); @@ -676,8 +687,13 @@ extern List as_mysql_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid, _setup_qos_limits(qos, &tmp_char1, &tmp_char2, &vals, &added_preempt, 0); + + assoc_mgr_lock(&locks); + qos_cnt = g_qos_count; + assoc_mgr_unlock(&locks); + if (added_preempt) { - preempt_bitstr = bit_alloc(g_qos_count); + preempt_bitstr = bit_alloc(qos_cnt); bit_unfmt(preempt_bitstr, added_preempt+1); xfree(added_preempt); } @@ -779,7 +795,8 @@ extern List as_mysql_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid, char *new_preempt = NULL; bool cleared = 0; - qos_rec->preempt_bitstr = bit_alloc(g_qos_count); + qos_rec->preempt_bitstr = bit_alloc(qos_cnt); + if (row[MQOS_PREEMPT] && row[MQOS_PREEMPT][0]) bit_unfmt(qos_rec->preempt_bitstr, row[MQOS_PREEMPT]+1); @@ -797,7 +814,7 @@ extern List as_mysql_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid, bit_nclear( qos_rec->preempt_bitstr, 0, - g_qos_count-1); + qos_cnt-1); } bit_set(qos_rec->preempt_bitstr, @@ -1041,6 +1058,9 @@ extern List as_mysql_get_qos(mysql_conn_t *mysql_conn, uid_t uid, int i=0; MYSQL_RES *result = NULL; MYSQL_ROW row; + uint32_t qos_cnt; + assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK, + NO_LOCK, NO_LOCK, NO_LOCK }; /* if this changes you will need to edit the corresponding enum */ char *qos_req_inx[] = { @@ -1181,6 +1201,10 @@ empty: qos_list = list_create(slurmdb_destroy_qos_rec); + assoc_mgr_lock(&locks); + qos_cnt = g_qos_count; + assoc_mgr_unlock(&locks); + while ((row = mysql_fetch_row(result))) { slurmdb_qos_rec_t *qos = xmalloc(sizeof(slurmdb_qos_rec_t)); list_append(qos_list, qos); @@ -1250,7 +1274,7 @@ empty: if (row[QOS_REQ_PREE] && row[QOS_REQ_PREE][0]) { if (!qos->preempt_bitstr) - qos->preempt_bitstr = bit_alloc(g_qos_count); + qos->preempt_bitstr = bit_alloc(qos_cnt); bit_unfmt(qos->preempt_bitstr, row[QOS_REQ_PREE]+1); } if (row[QOS_REQ_PREEM]) diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 3bd7213640d3627498a514ff0344472ff5218456..967246ae3573253fa7963f1441504a4ab579eabb 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1043,7 +1043,7 @@ static int _schedule(uint32_t job_limit) /* Locks: Read config, write job, write node, read partition */ slurmctld_lock_t job_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; - char job_id_str[32]; + char jbuf[JBUFSIZ]; bool is_job_array_head; #ifdef HAVE_BG char *ionodes = NULL; @@ -1760,7 +1760,7 @@ next_task: (error_code != ESLURM_NODE_NOT_AVAIL) && (error_code != ESLURM_INVALID_BURST_BUFFER_REQUEST)){ info("sched: schedule: %s non-runnable:%s", - jobid2str(job_ptr, job_id_str, sizeof(job_id_str)), + jobid2str(job_ptr, jbuf, sizeof(jbuf)), slurm_strerror(error_code)); if (!wiki_sched) { last_job_update = now; diff --git a/testsuite/expect/inc21.21_tests b/testsuite/expect/inc21.21_tests index 7f3cebe35b1b61cda1d68601c31433947e2b4192..e22088bbe4e550b5faeaa1cb47268a17b1ca2a2d 100644 --- a/testsuite/expect/inc21.21_tests +++ b/testsuite/expect/inc21.21_tests @@ -211,7 +211,7 @@ sleep 10" set pending 0 set running 0 - spawn $squeue -h -o "\%t \%r" + spawn $squeue -A $ta -h -o "\%t \%r" expect { -re "PD." { incr pending @@ -384,7 +384,7 @@ proc inc21_21_submit_test { limit } { sleep 4 set matches 0 - set mypid [spawn $squeue -h -o "\%i \%t \%r"] + set mypid [spawn $squeue -A$ta -h -o "\%i \%t \%r"] expect { -re "($job_id(2)|$job_id(3)).PD.AssocMaxJobsLimit" { incr matches