From 77d5598ab16d419a82585fa82b8664af2eeb9fb7 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Fri, 4 Jan 2008 20:37:49 +0000 Subject: [PATCH] mods to the bluegene plugin to include the job_ptr from the controller --- slurm/slurm.h.in | 3 - src/common/node_select.c | 32 ---- src/common/node_select.h | 7 - .../select/bluegene/plugin/bg_job_place.c | 156 +++++++++--------- .../select/bluegene/plugin/bg_job_run.c | 49 +++--- src/plugins/select/bluegene/plugin/bluegene.c | 2 +- src/plugins/select/bluegene/plugin/bluegene.h | 9 +- .../select/bluegene/plugin/select_bluegene.c | 43 +---- src/plugins/select/cons_res/select_cons_res.c | 5 - src/plugins/select/linear/select_linear.c | 5 - 10 files changed, 116 insertions(+), 195 deletions(-) diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 518c9d21320..e85be2a79c4 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -271,8 +271,6 @@ enum select_data_type { SELECT_DATA_MLOADER_IMAGE,/* data-> char *mloaderimage */ SELECT_DATA_RAMDISK_IMAGE,/* data-> char *ramdiskimage */ SELECT_DATA_REBOOT, /* data-> uint16_t reboot */ - SELECT_DATA_EST_START /* data-> time_t etimated time job - * will start*/ }; enum select_print_mode { @@ -291,7 +289,6 @@ enum select_print_mode { SELECT_PRINT_MLOADER_IMAGE,/* Print just the MLOADER IMAGE */ SELECT_PRINT_RAMDISK_IMAGE,/* Print just the RAMDISK IMAGE */ SELECT_PRINT_REBOOT, /* Print just the REBOOT */ - SELECT_PRINT_EST_START /* Print just the EST_START */ }; enum select_node_cnt { diff --git a/src/common/node_select.c b/src/common/node_select.c index c912edac254..69874881b4f 100644 --- a/src/common/node_select.c +++ b/src/common/node_select.c @@ -85,7 +85,6 @@ typedef struct slurm_select_ops { int (*job_begin) (struct job_record *job_ptr); int (*job_ready) (struct job_record *job_ptr); int (*job_fini) (struct job_record *job_ptr); - int (*job_update_end_time) (struct job_record *job_ptr); int (*job_suspend) (struct job_record *job_ptr); int (*job_resume) (struct job_record *job_ptr); int (*pack_node_info) (time_t last_query_time, @@ -145,7 +144,6 @@ struct select_jobinfo { char *linuximage; /* LinuxImage for this block */ char *mloaderimage; /* mloaderImage for this block */ char *ramdiskimage; /* RamDiskImage for this block */ - time_t est_job_start; /* Estimated start time of job */ }; #endif @@ -174,7 +172,6 @@ static slurm_select_ops_t * _select_get_ops(slurm_select_context_t *c) "select_p_job_begin", "select_p_job_ready", "select_p_job_fini", - "select_p_job_update_end_time", "select_p_job_suspend", "select_p_job_resume", "select_p_pack_node_info", @@ -580,19 +577,6 @@ extern int select_g_job_fini(struct job_record *job_ptr) return (*(g_select_context->ops.job_fini))(job_ptr); } -/* - * Suspend a job. Executed from slurmctld. - * IN job_ptr - pointer to job being suspended - * RET SLURM_SUCCESS or error code - */ -extern int select_g_job_update_end_time(struct job_record *job_ptr) -{ - if (slurm_select_init() < 0) - return SLURM_ERROR; - - return (*(g_select_context->ops.job_update_end_time))(job_ptr); -} - /* * Suspend a job. Executed from slurmctld. * IN job_ptr - pointer to job being suspended @@ -767,7 +751,6 @@ extern int select_g_set_jobinfo (select_jobinfo_t jobinfo, int i, rc = SLURM_SUCCESS; uint16_t *uint16 = (uint16_t *) data; uint32_t *uint32 = (uint32_t *) data; - time_t *time = (time_t *) data; char *tmp_char = (char *) data; if (jobinfo == NULL) { @@ -839,9 +822,6 @@ extern int select_g_set_jobinfo (select_jobinfo_t jobinfo, xfree(jobinfo->ramdiskimage); jobinfo->ramdiskimage = xstrdup(tmp_char); break; - case SELECT_DATA_EST_START: - jobinfo->est_job_start = *time; - break; default: debug("select_g_set_jobinfo data_type %d invalid", data_type); @@ -862,7 +842,6 @@ extern int select_g_get_jobinfo (select_jobinfo_t jobinfo, int i, rc = SLURM_SUCCESS; uint16_t *uint16 = (uint16_t *) data; uint32_t *uint32 = (uint32_t *) data; - time_t *time = (time_t *) data; char **tmp_char = (char **) data; if (jobinfo == NULL) { @@ -952,9 +931,6 @@ extern int select_g_get_jobinfo (select_jobinfo_t jobinfo, else *tmp_char = xstrdup(jobinfo->ramdiskimage); break; - case SELECT_DATA_EST_START: - *time = jobinfo->est_job_start; - break; default: debug("select_g_get_jobinfo data_type %d invalid", data_type); @@ -999,8 +975,6 @@ extern select_jobinfo_t select_g_copy_jobinfo(select_jobinfo_t jobinfo) rc->linuximage = xstrdup(jobinfo->linuximage); rc->mloaderimage = xstrdup(jobinfo->mloaderimage); rc->ramdiskimage = xstrdup(jobinfo->ramdiskimage); - rc->est_job_start = jobinfo->est_job_start; - } return rc; @@ -1063,7 +1037,6 @@ extern int select_g_pack_jobinfo (select_jobinfo_t jobinfo, Buf buffer) packstr(jobinfo->linuximage, buffer); packstr(jobinfo->mloaderimage, buffer); packstr(jobinfo->ramdiskimage, buffer); - pack_time(jobinfo->est_job_start, buffer); } else { /* pack space for 3 positions for start and for geo * then 1 for conn_type, reboot, and rotate @@ -1081,7 +1054,6 @@ extern int select_g_pack_jobinfo (select_jobinfo_t jobinfo, Buf buffer) packstr("", buffer); //linux packstr("", buffer); //mloader packstr("", buffer); //ramdisk - pack_time(0, buffer); //est_job_start } return SLURM_SUCCESS; @@ -1116,7 +1088,6 @@ extern int select_g_unpack_jobinfo(select_jobinfo_t jobinfo, Buf buffer) safe_unpackstr_xmalloc(&(jobinfo->linuximage), &uint32_tmp, buffer); safe_unpackstr_xmalloc(&(jobinfo->mloaderimage), &uint32_tmp, buffer); safe_unpackstr_xmalloc(&(jobinfo->ramdiskimage), &uint32_tmp, buffer); - safe_unpack_time(&(jobinfo->est_job_start), buffer); return SLURM_SUCCESS; @@ -1280,9 +1251,6 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo, tmp_image = jobinfo->ramdiskimage; snprintf(buf, size, "%s", tmp_image); break; - case SELECT_PRINT_EST_START: - snprintf(buf, size, "%u", (int)jobinfo->est_job_start); - break; default: error("select_g_sprint_jobinfo: bad mode %d", mode); if (size > 0) diff --git a/src/common/node_select.h b/src/common/node_select.h index d57e578c0c4..59e70e52b23 100644 --- a/src/common/node_select.h +++ b/src/common/node_select.h @@ -194,13 +194,6 @@ extern int select_g_job_ready(struct job_record *job_ptr); */ extern int select_g_job_fini(struct job_record *job_ptr); -/* - * To be called when the end time of a job is changed. - * IN job_ptr - pointer to job being suspended - * RET SLURM_SUCCESS or error code - */ -extern int select_p_job_update_end_time(struct job_record *job_ptr); - /* * Suspend a job. Executed from slurmctld. * IN job_ptr - pointer to job being suspended diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index 00277f0bb98..dd815433c7d 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -114,7 +114,7 @@ static void _rotate_geo(uint16_t *req_geometry, int rot_cnt) /* * Comparator used for sorting blocks smallest to largest * - * returns: -1: rec_a >rec_b 0: rec_a == rec_b 1: rec_a < rec_b + * returns: -1: rec_a < rec_b 0: rec_a == rec_b 1: rec_a > rec_b * */ static int _bg_record_sort_aval_inc(bg_record_t* rec_a, bg_record_t* rec_b) @@ -122,10 +122,16 @@ static int _bg_record_sort_aval_inc(bg_record_t* rec_a, bg_record_t* rec_b) int size_a = rec_a->node_cnt; int size_b = rec_b->node_cnt; - if(rec_a->est_job_end < rec_b->est_job_end) + if(rec_a->job_ptr && !rec_b->job_ptr) return -1; - else if(rec_a->est_job_end > rec_b->est_job_end) + else if(!rec_a->job_ptr && rec_b->job_ptr) return 1; + else if(rec_a->job_ptr && rec_b->job_ptr) { + if(rec_a->job_ptr->start_time > rec_b->job_ptr->start_time) + return 1; + else if(rec_a->job_ptr->start_time < rec_b->job_ptr->start_time) + return -1; + } if (size_a < size_b) return -1; @@ -162,10 +168,16 @@ static int _bg_record_sort_aval_dec(bg_record_t* rec_a, bg_record_t* rec_b) int size_a = rec_a->node_cnt; int size_b = rec_b->node_cnt; - if(rec_a->est_job_end > rec_b->est_job_end) - return -1; - else if(rec_a->est_job_end < rec_b->est_job_end) + if(rec_a->job_ptr && !rec_b->job_ptr) return 1; + else if(!rec_a->job_ptr && rec_b->job_ptr) + return -1; + else if(rec_a->job_ptr && rec_b->job_ptr) { + if(rec_a->job_ptr->start_time > rec_b->job_ptr->start_time) + return -1; + else if(rec_a->job_ptr->start_time < rec_b->job_ptr->start_time) + return 1; + } if (size_a < size_b) return -1; @@ -1041,7 +1053,7 @@ static int _find_best_block_match(List block_list, debug2("taking off %d(%s) ends at %d", bg_record->job_running, bg_record->bg_block_id, - bg_record->est_job_end); + bg_record->job_ptr->start_time); if(!(new_blocks = create_dynamic_block( block_list, &request, job_list))) { destroy_bg_record(bg_record); @@ -1056,8 +1068,8 @@ static int _find_best_block_match(List block_list, rc = SLURM_SUCCESS; (*found_bg_record) = list_pop(new_blocks); if(bg_record) { - (*found_bg_record)->est_job_end - = bg_record->est_job_end; + (*found_bg_record)->job_ptr + = bg_record->job_ptr; destroy_bg_record(bg_record); } list_destroy(new_blocks); @@ -1138,12 +1150,12 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, bg_record_t* bg_record = NULL; char buf[100]; int i, rc = SLURM_SUCCESS; - uint16_t geo[BA_SYSTEM_DIMENSIONS]; uint16_t tmp16 = (uint16_t)NO_VAL; List block_list = NULL; int block_list_count = 0; int blocks_added = 0; - + int starttime = time(NULL); + if(bluegene_layout_mode == LAYOUT_DYNAMIC) slurm_mutex_lock(&create_dynamic_mutex); @@ -1177,85 +1189,73 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, &bg_record, test_only); if(rc == SLURM_SUCCESS) { - if(bg_record && !bg_record->bg_block_id) { - //ListIterator itr = NULL; - - debug2("%d can start job at %u on %s on unmade block", - test_only, bg_record->est_job_end, - bg_record->nodes); - select_g_set_jobinfo(job_ptr->select_jobinfo, + if(bg_record) { + if(bg_record->job_ptr && bg_record->job_ptr->end_time) + starttime = bg_record->job_ptr->end_time; + + job_ptr->start_time = starttime; + + if(!bg_record->bg_block_id) { + uint16_t geo[BA_SYSTEM_DIMENSIONS]; + + debug2("%d can start job at " + "%u on %s on unmade block", + test_only, starttime, + bg_record->nodes); + select_g_set_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_BLOCK_ID, "unassigned"); - if(job_ptr->num_procs < bluegene_bp_node_cnt - && job_ptr->num_procs > 0) { - i = procs_per_node/job_ptr->num_procs; - debug2("divide by %d", i); - } else - i = 1; - min_nodes *= bluegene_bp_node_cnt/i; - - /* itr = list_iterator_create(block_list); */ -/* while(bg_record = list_next(itr)) { */ - -/* } */ - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_EST_START, - &bg_record->est_job_end); - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_NODES, - bg_record->nodes); - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_IONODES, - bg_record->ionodes); - - select_g_set_jobinfo(job_ptr->select_jobinfo, + if(job_ptr->num_procs < bluegene_bp_node_cnt + && job_ptr->num_procs > 0) { + i = procs_per_node/job_ptr->num_procs; + debug2("divide by %d", i); + } else + i = 1; + min_nodes *= bluegene_bp_node_cnt/i; + select_g_set_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_NODE_CNT, &min_nodes); + memset(geo, 0, + sizeof(uint16_t) * BA_SYSTEM_DIMENSIONS); + select_g_set_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_GEOMETRY, + &geo); + } else { + if((bg_record->ionodes) + && (job_ptr->part_ptr->max_share <= 1)) + error("Small block used in " + "non-shared partition"); + + debug2("%d can start job at %u on %s", + test_only, starttime, + bg_record->nodes); + + select_g_set_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_BLOCK_ID, + bg_record->bg_block_id); + select_g_set_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_NODE_CNT, + &bg_record->node_cnt); + select_g_set_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_GEOMETRY, + &bg_record->geo); + + tmp16 = bg_record->conn_type; + select_g_set_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_CONN_TYPE, + &tmp16); + } - memset(geo, 0, sizeof(uint16_t) * BA_SYSTEM_DIMENSIONS); - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_GEOMETRY, - &geo); - } else if(bg_record) { - if((bg_record->ionodes) - && (job_ptr->part_ptr->max_share <= 1)) - error("Small block used in " - "non-shared partition"); - debug2("%d can start job at %u on %s", - test_only, bg_record->est_job_end, - bg_record->nodes); - /* set the block id and info about block */ - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_NODE_CNT, - &bg_record->node_cnt); - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_GEOMETRY, - &bg_record->geo); - select_g_set_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_NODES, bg_record->nodes); select_g_set_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_IONODES, bg_record->ionodes); - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_EST_START, - &bg_record->est_job_end); - tmp16 = bg_record->conn_type; - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_CONN_TYPE, - &tmp16); - if(test_only) { - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_BLOCK_ID, - "unassigned"); - } else { - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_BLOCK_ID, - bg_record->bg_block_id); - bg_record->job_running = job_ptr->job_id; - } + + } else { + error("we got a success, but no block back"); } } diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index bac6f84dd76..0393c765988 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -70,9 +70,8 @@ enum update_op {START_OP, TERM_OP, SYNC_OP}; typedef struct bg_update { enum update_op op; /* start | terminate | sync */ - uid_t uid; /* new user */ - uint32_t job_id; /* SLURM job id */ - time_t end_time; /* estimated time job will end */ + struct job_record *job_ptr; /* pointer to job running on + * block or NULL if no job */ uint16_t reboot; /* reboot block before starting job */ pm_partition_id_t bg_block_id; char *blrtsimage; /* BlrtsImage for this block */ @@ -212,8 +211,10 @@ static void _sync_agent(bg_update_t *bg_update_ptr) return; } slurm_mutex_lock(&block_state_mutex); - bg_record->job_running = bg_update_ptr->job_id; - bg_record->est_job_end = bg_update_ptr->end_time; + + bg_record->job_running = bg_update_ptr->job_ptr->job_id; + bg_record->job_ptr = bg_update_ptr->job_ptr; + if(!block_exist_in_list(bg_job_block_list, bg_record)) { list_push(bg_job_block_list, bg_record); num_unused_cpus -= bg_record->bp_count*bg_record->cpus_per_bp; @@ -223,17 +224,17 @@ static void _sync_agent(bg_update_t *bg_update_ptr) slurm_mutex_unlock(&block_state_mutex); if(bg_record->state == RM_PARTITION_READY) { - if(bg_record->user_uid != bg_update_ptr->uid) { + if(bg_record->user_uid != bg_update_ptr->job_ptr->user_id) { int set_user_rc = SLURM_SUCCESS; slurm_mutex_lock(&block_state_mutex); debug("User isn't correct for job %d on %s, " "fixing...", - bg_update_ptr->job_id, + bg_update_ptr->job_ptr->job_id, bg_update_ptr->bg_block_id); xfree(bg_record->target_name); - bg_record->target_name = - xstrdup(uid_to_string(bg_update_ptr->uid)); + bg_record->target_name = xstrdup( + uid_to_string(bg_update_ptr->job_ptr->user_id)); set_user_rc = set_block_user(bg_record); slurm_mutex_unlock(&block_state_mutex); @@ -270,7 +271,7 @@ static void _start_agent(bg_update_t *bg_update_ptr) if(!bg_record) { error("block %s not found in bg_list", bg_update_ptr->bg_block_id); - (void) slurm_fail_job(bg_update_ptr->job_id); + (void) slurm_fail_job(bg_update_ptr->job_ptr->job_id); slurm_mutex_unlock(&job_start_mutex); return; } @@ -280,7 +281,7 @@ static void _start_agent(bg_update_t *bg_update_ptr) slurm_mutex_unlock(&job_start_mutex); debug("job %d finished during the queueing job " "(everything is ok)", - bg_update_ptr->job_id); + bg_update_ptr->job_ptr->job_id); return; } if(bg_record->state == RM_PARTITION_DEALLOCATING) { @@ -337,7 +338,7 @@ static void _start_agent(bg_update_t *bg_update_ptr) slurm_mutex_unlock(&block_state_mutex); slurm_mutex_unlock(&job_start_mutex); debug("job %d already finished before boot", - bg_update_ptr->job_id); + bg_update_ptr->job_ptr->job_id); return; } @@ -427,7 +428,7 @@ static void _start_agent(bg_update_t *bg_update_ptr) is a no-op if issued prior to the script initiation do clean up just incase the fail job isn't ran */ - (void) slurm_fail_job(bg_update_ptr->job_id); + (void) slurm_fail_job(bg_update_ptr->job_ptr->job_id); slurm_mutex_lock(&block_state_mutex); if (remove_from_bg_list(bg_job_block_list, bg_record) == SLURM_SUCCESS) { @@ -446,14 +447,15 @@ static void _start_agent(bg_update_t *bg_update_ptr) slurm_mutex_unlock(&job_start_mutex); debug("job %d finished during the start of the boot " "(everything is ok)", - bg_update_ptr->job_id); + bg_update_ptr->job_ptr->job_id); return; } slurm_mutex_lock(&block_state_mutex); bg_record->boot_count = 0; xfree(bg_record->target_name); - bg_record->target_name = xstrdup(uid_to_string(bg_update_ptr->uid)); + bg_record->target_name = xstrdup( + uid_to_string(bg_update_ptr->job_ptr->user_id)); debug("setting the target_name for Block %s to %s", bg_record->bg_block_id, bg_record->target_name); @@ -612,7 +614,7 @@ static void _term_agent(bg_update_t *bg_update_ptr) slurm_mutex_lock(&block_state_mutex); if(bg_record->job_running > NO_JOB_RUNNING) { bg_record->job_running = NO_JOB_RUNNING; - bg_record->est_job_end = 0; + bg_record->job_ptr = NULL; } /* remove user from list */ @@ -847,9 +849,7 @@ extern int start_job(struct job_record *job_ptr) bg_update_ptr = xmalloc(sizeof(bg_update_t)); bg_update_ptr->op = START_OP; - bg_update_ptr->uid = job_ptr->user_id; - bg_update_ptr->job_id = job_ptr->job_id; - bg_update_ptr->end_time = job_ptr->end_time; + bg_update_ptr->job_ptr = job_ptr; select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_BLOCK_ID, @@ -899,8 +899,8 @@ extern int start_job(struct job_record *job_ptr) slurm_mutex_lock(&block_state_mutex); job_ptr->num_procs = (bg_record->cpus_per_bp * bg_record->bp_count); - bg_record->job_running = bg_update_ptr->job_id; - bg_record->est_job_end = bg_update_ptr->end_time; + bg_record->job_running = bg_update_ptr->job_ptr->job_id; + bg_record->job_ptr = bg_update_ptr->job_ptr; if(!block_exist_in_list(bg_job_block_list, bg_record)) { list_push(bg_job_block_list, bg_record); num_unused_cpus -= @@ -939,8 +939,7 @@ int term_job(struct job_record *job_ptr) bg_update_ptr = xmalloc(sizeof(bg_update_t)); bg_update_ptr->op = TERM_OP; - bg_update_ptr->uid = job_ptr->user_id; - bg_update_ptr->job_id = job_ptr->job_id; + bg_update_ptr->job_ptr = job_ptr; select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_BLOCK_ID, &(bg_update_ptr->bg_block_id)); @@ -1030,9 +1029,7 @@ extern int sync_jobs(List job_list) bg_update_ptr->bg_block_id, job_ptr->end_time); bg_update_ptr->op = SYNC_OP; - bg_update_ptr->uid = job_ptr->user_id; - bg_update_ptr->job_id = job_ptr->job_id; - bg_update_ptr->end_time = job_ptr->end_time; + bg_update_ptr->job_ptr = job_ptr; _block_op(bg_update_ptr); } list_iterator_destroy(job_iterator); diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index c44a405c103..1a61b7bfb49 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -567,7 +567,7 @@ extern void copy_bg_record(bg_record_t *fir_record, bg_record_t *sec_record) list_iterator_destroy(itr); } sec_record->job_running = fir_record->job_running; - sec_record->est_job_end = fir_record->est_job_end; + sec_record->job_ptr = fir_record->job_ptr; sec_record->cpus_per_bp = fir_record->cpus_per_bp; sec_record->node_cnt = fir_record->node_cnt; sec_record->quarter = fir_record->quarter; diff --git a/src/plugins/select/bluegene/plugin/bluegene.h b/src/plugins/select/bluegene/plugin/bluegene.h index 45f123fc051..1a1d11493ba 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.h +++ b/src/plugins/select/bluegene/plugin/bluegene.h @@ -98,10 +98,11 @@ typedef struct bg_record { bitstr_t *ionode_bitmap; /* for small blocks bitmap to keep track which ionodes we are on. NULL if not a small block*/ - int job_running; /* job id if there is a job running - on the block */ - time_t est_job_end; /* if job_running time job is - estimated to end */ + struct job_record *job_ptr; /* pointer to job running on + * block or NULL if no job */ + int job_running; /* job id of job running of if + * block is in an error state + * BLOCK_ERROR_STATE */ int cpus_per_bp; /* count of cpus per base part */ uint32_t node_cnt; /* count of nodes per block */ uint16_t quarter; /* used for small blocks diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 0aa2fdbdb06..a18bcd8d026 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -618,7 +618,8 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) * identify the nodes which "best" satify the request. The specified * nodes may be DOWN or BUSY at the time of this test as may be used * to deterime if a job could ever run. - * IN job_ptr - pointer to job being scheduled + * IN/OUT job_ptr - pointer to job being scheduled start_time is set + * when we can possibly start job. * IN/OUT bitmap - usable nodes are set on input, nodes not required to * satisfy the request are cleared, other left set * IN min_nodes - minimum count of nodes @@ -632,19 +633,19 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) * select_p_job_test is called */ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t *bitmap, - uint32_t min_nodes, uint32_t max_nodes, - uint32_t req_nodes, int mode) + uint32_t min_nodes, uint32_t max_nodes, + uint32_t req_nodes, int mode) { bool test_only; - if (mode == SELECT_MODE_TEST_ONLY) + if (mode == SELECT_MODE_TEST_ONLY || mode == SELECT_MODE_WILL_RUN) test_only = true; - else if (mode == SELECT_MODE_TEST_ONLY) + else if (mode == SELECT_MODE_RUN_NOW) test_only = false; - else /* SELECT_MODE_WILL_RUN */ - return EINVAL; /* not yet supported */ + else + return EINVAL; /* something not yet supported */ - /* bg block test - is there a block where we have: + /* submit_job - is there a block where we have: * 1) geometry requested * 2) min/max nodes (BPs) requested * 3) type: TORUS or MESH or NAV (torus else mesh) @@ -667,32 +668,6 @@ extern int select_p_job_fini(struct job_record *job_ptr) return term_job(job_ptr); } -extern int select_p_job_update_end_time(struct job_record *job_ptr) -{ - int rc = SLURM_ERROR; - char *block_id = NULL; - bg_record_t *bg_record = NULL; - - rc = select_g_get_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_BLOCK_ID, &block_id); - if (rc == SLURM_SUCCESS) { - bg_record = find_bg_record_in_list(bg_list, block_id); - slurm_mutex_lock(&block_state_mutex); - - if(bg_record) { - if(bg_record->job_running != job_ptr->job_id) - rc = SLURM_ERROR; - else - bg_record->est_job_end = job_ptr->end_time; - } else - rc = SLURM_ERROR; - slurm_mutex_unlock(&block_state_mutex); - xfree(block_id); - } - - return rc; -} - extern int select_p_job_suspend(struct job_record *job_ptr) { return ESLURM_NOT_SUPPORTED; diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index d65a4442090..e5b01afacf1 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -2680,11 +2680,6 @@ extern int select_p_job_fini(struct job_record *job_ptr) return SLURM_SUCCESS; } -extern int select_p_job_update_end_time(struct job_record *job_ptr) -{ - return SLURM_SUCCESS; -} - extern int select_p_job_suspend(struct job_record *job_ptr) { ListIterator job_iterator; diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index be95f078446..038b0064066 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -867,11 +867,6 @@ extern int select_p_job_fini(struct job_record *job_ptr) return rc; } -extern int select_p_job_update_end_time(struct job_record *job_ptr) -{ - return SLURM_SUCCESS; -} - extern int select_p_job_suspend(struct job_record *job_ptr) { slurm_mutex_lock(&cr_mutex); -- GitLab