diff --git a/NEWS b/NEWS index 3561d03f092a02230ded86496ea745c991dfc2a4..bfd7a186bc6e284eaad78908ade1cb1317e6a0ef 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,8 @@ documents those changes that are of interest to users and admins. * Changes in SLURM 0.3.0.0-pre7 (not tagged yet) =============================== + -- Fixes for reported problems: + - slurm/381: Hold jobs requesting more resources than partition limit -- Add support for getting node's real memory size on AIX -- Sinfo sort partitions in slurm.conf order, new sort option ("#P") -- Document how to gracefully change plugin values diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index b28e96716fc2f3cf15d3a1a83ecdaf54d377d725..0a2ffcf52fbb7461571161bc56a5306e526b7c4b 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1447,7 +1447,7 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, * IN allocate - resource allocation request if set rather than job submit * IN will_run - job is not to be created, test of validity only * OUT new_job_id - the job's ID - * OUT job_rec_ptr - pointer to the job (NULL on error) + * OUT job_pptr - pointer to the job (NULL on error) * RET 0 on success, otherwise ESLURM error code * globals: job_list - pointer to global job list * list_part - global list of partition info @@ -1457,13 +1457,13 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, int allocate, int will_run, - struct job_record **job_rec_ptr, uid_t submit_uid) + struct job_record **job_pptr, uid_t submit_uid) { int error_code = SLURM_SUCCESS, i; struct part_record *part_ptr; bitstr_t *req_bitmap = NULL, *exc_bitmap = NULL; - *job_rec_ptr = (struct job_record *) NULL; + *job_pptr = (struct job_record *) NULL; if ((error_code = _validate_job_desc(job_desc, allocate, submit_uid))) return error_code; @@ -1584,7 +1584,7 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, } if ((error_code = _copy_job_desc_to_job_record(job_desc, - job_rec_ptr, + job_pptr, part_ptr, &req_bitmap, &exc_bitmap))) { @@ -1594,16 +1594,16 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, if (job_desc->script) { if ((error_code = _copy_job_desc_to_file(job_desc, - (*job_rec_ptr)-> + (*job_pptr)-> job_id))) { - (*job_rec_ptr)->job_state = JOB_FAILED; + (*job_pptr)->job_state = JOB_FAILED; error_code = ESLURM_WRITING_TO_FILE; goto cleanup; } - (*job_rec_ptr)->batch_flag = 1; + (*job_pptr)->batch_flag = 1; } else - (*job_rec_ptr)->batch_flag = 0; - *new_job_id = (*job_rec_ptr)->job_id; + (*job_pptr)->batch_flag = 0; + *new_job_id = (*job_pptr)->job_id; /* Insure that requested partition is valid right now, * otherwise leave job queued and provide warning code */ @@ -1623,6 +1623,8 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, *new_job_id, part_ptr->name); error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; } + if (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) + (*job_pptr)->priority = 1; /* Move to end of queue */ cleanup: FREE_NULL_BITMAP(req_bitmap); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 54620d3c44c363f31ab2ff4599b0be3b51125483..df3faf0d852ed3a3bb1d54118d7d1053c2b5dc49 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -866,8 +866,11 @@ int select_nodes(struct job_record *job_ptr, bool test_only) (job_ptr->time_limit > part_ptr->max_time)) || ((job_ptr->details->max_nodes != 0) && /* no node limit */ (job_ptr->details->max_nodes < part_ptr->min_nodes)) || - (job_ptr->details->min_nodes > part_ptr->max_nodes)) + (job_ptr->details->min_nodes > part_ptr->max_nodes)) { + job_ptr->priority = 1; /* move to end of queue */ + last_job_update = time(NULL); return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; + } /* build sets of usable nodes based upon their configuration */ error_code = _build_node_list(job_ptr, &node_set_ptr, &node_set_size); diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 3b1277250096e2315f7a3d7c02ee0a2a1a81bd63..e469828bb30d5ddfdf4d435dd2f16b7b2602304f 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -739,6 +739,9 @@ int update_part(update_part_msg_t * part_desc) } } + if (error_code == SLURM_SUCCESS) + reset_job_priority(); /* free jobs */ + return error_code; }