diff --git a/src/common/job_resources.c b/src/common/job_resources.c index 7a6962432bd7013a06866485bf90469170a52e81..1daedb524bbf5ddb12b85f998e42f25dfdcc82cb 100644 --- a/src/common/job_resources.c +++ b/src/common/job_resources.c @@ -1430,7 +1430,7 @@ extern int adapt_layouts(job_resources_t *job_resrcs_ptr, uint32_t cpu_freq_max, "CoresCount,LastCore", data, (sizeof(uint32_t)*2),L_T_UINT32); if(cpu_freq_max != 0){ - for (i=1; i<num_freq; i++) { + for (i=1; i<num_freq + 1; i++) { sprintf(temp, "Cpufreq%d", i); layouts_entity_pullget_kv("power_cpufreq", node_name, temp, &val, L_T_UINT32); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 0123390d7432f14a2b3687be79a8b5c873ee27fd..577ef450cb5053b7412f447d12fd07596d1f0b58 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1055,6 +1055,7 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, } else { uint32_t min_watts, max_watts, job_cap, tmp_pcap_cpu_freq; uint32_t cur_max_watts, tmp_max_watts, *tmp_max_watts_dvfs; + uint32_t cpus_per_node; bitstr_t *tmp_bitmap; int k=1,*allowed_freqs; float ratio=0; @@ -1092,10 +1093,12 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, xmalloc(sizeof(uint32_t)*(allowed_freqs[0]+1)); else tmp_max_watts_dvfs = NULL; + cpus_per_node = job_ptr->details->min_cpus / + job_ptr->details->min_nodes; tmp_max_watts = powercap_get_node_bitmap_maxwatts_dvfs(tmp_bitmap, *select_bitmap, tmp_max_watts_dvfs, allowed_freqs, - job_ptr->details->min_cpus); + cpus_per_node); } bit_free(tmp_bitmap); @@ -1106,7 +1109,8 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, * reservations on the failure */ job_cap = powercap_get_job_cap(job_ptr, time(NULL)); - if(layout_power == 1){ + if((layout_power == 1) || + ((layout_power == 2) && (allowed_freqs[0] == 0))){ if (tmp_max_watts > job_cap) { FREE_NULL_BITMAP(*select_bitmap); if ((job_cap < powercap) && @@ -1116,61 +1120,64 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, error_code = ESLURM_POWER_NOT_AVAIL; } } else if (layout_power == 2){ + + if (((tmp_max_watts > job_cap) || + (job_cap < powercap) || + (powercap < max_watts)) && (allowed_freqs[0] > 0)){ - if ((tmp_max_watts > job_cap) || - (job_cap < powercap) || - (powercap < max_watts)) { - - /* Calculation of the CPU Frequency to set for the job: - * The optimal CPU Frequency is the maximum allowed - * CPU Frequency that all idle nodes could run so that - * the total power consumption of the cluster is below - * the powercap value.since the number of Idle nodes - * may change in every schedule the optimal CPU - * Frequency may also change from one job to another.*/ + /* Calculation of the CPU Frequency to set for the job: + * The optimal CPU Frequency is the maximum allowed + * CPU Frequency that all idle nodes could run so that + * the total power consumption of the cluster is below + * the powercap value.since the number of Idle nodes + * may change in every schedule the optimal CPU + * Frequency may also change from one job to another.*/ k=powercap_get_job_optimal_cpufreq(job_cap, - allowed_freqs); + allowed_freqs); while ((tmp_max_watts_dvfs[k] > job_cap) && (k < allowed_freqs[0] +1)) { k++; } if (k == allowed_freqs[0] +1) { if ((job_cap < powercap) && - (tmp_max_watts_dvfs[k] <= powercap)){ - error_code = ESLURM_POWER_RESERVED; + (tmp_max_watts_dvfs[k] <= powercap)){ + error_code = + ESLURM_POWER_RESERVED; } else { - error_code = ESLURM_POWER_NOT_AVAIL; + error_code = + ESLURM_POWER_NOT_AVAIL; } } else { tmp_max_watts = tmp_max_watts_dvfs[k]; tmp_pcap_cpu_freq = - powercap_get_cpufreq( *select_bitmap, - allowed_freqs[k]); + powercap_get_cpufreq(*select_bitmap, + allowed_freqs[k]); } job_ptr->details->cpu_freq_min = - job_ptr->details->cpu_freq_max = tmp_pcap_cpu_freq; + job_ptr->details->cpu_freq_max = + tmp_pcap_cpu_freq; job_ptr->details->cpu_freq_gov = 0x10; - /* Since we alter the DVFS of jobs we need to deal with - * their time_limit to calculate the extra time needed - * for them to complete the execution without getting - * killed there should be a parameter to declare the - * effect of cpu frequency on execution time for the - * moment we use time_limit and time_min - * This has to be done to allow backfilling */ + /* Since we alter the DVFS of jobs we need to deal with + * their time_limit to calculate the extra time needed + * for them to complete the execution without getting + * killed there should be a parameter to declare the + * effect of cpu frequency on execution time for the + * moment we use time_limit and time_min + * This has to be done to allow backfilling */ - ratio = (1 + - (float)allowed_freqs[k]/(float)allowed_freqs[-1]); + ratio = (1 + (float)allowed_freqs[k] / + (float)allowed_freqs[-1]); if ((job_ptr->time_limit != INFINITE) && (job_ptr->time_limit != NO_VAL)) - job_ptr->time_limit = - (ratio * job_ptr->time_limit); + job_ptr->time_limit = (ratio * + job_ptr->time_limit); if ((job_ptr->time_min != INFINITE) && (job_ptr->time_min != NO_VAL)) - job_ptr->time_min = - (ratio * job_ptr->time_min); + job_ptr->time_min = (ratio * + job_ptr->time_min); } xfree(tmp_max_watts_dvfs); } diff --git a/src/slurmctld/powercapping.c b/src/slurmctld/powercapping.c index a44dce49949913f802403dff55a00a6511103cfc..a7dc4c744349f555dffd3eb83d9393284d759440 100644 --- a/src/slurmctld/powercapping.c +++ b/src/slurmctld/powercapping.c @@ -285,8 +285,12 @@ uint32_t powercap_get_cluster_current_max_watts(void) return 0; if (!power_layout_ready()) return 0; - - cur_max_watts = powercap_get_node_bitmap_maxwatts(NULL); + + if (which_power_layout() == 1) + cur_max_watts = powercap_get_node_bitmap_maxwatts(NULL); + else + cur_max_watts = powercap_get_node_bitmap_maxwatts_dvfs(NULL,NULL,NULL,0,0); + return cur_max_watts; } @@ -432,7 +436,7 @@ int* powercap_get_job_nodes_numfreq(bitstr_t *select_bitmap, if (bit_test(select_bitmap, i)) { layouts_entity_pullget_kv(l_name, node_ptr->name, L_NUM_FREQ, &num_freq, L_T_UINT16); - allowed_freqs = xmalloc ( sizeof (int) * ((int)num_freq+2)); + allowed_freqs = xmalloc(sizeof(int)*((int)num_freq+2)); allowed_freqs[-1] = num_freq; for(p=num_freq; p>0; p--){ sprintf(ename, "Cpufreq%d", p); @@ -463,12 +467,12 @@ uint32_t powercap_get_node_bitmap_maxwatts_dvfs(bitstr_t *idle_bitmap, int i,p; char ename[128], keyname[128]; bitstr_t *tmp_bitmap = NULL; - uint32_t data[4],core_data[3]; + uint32_t data[5],core_data[4]; if (!_powercap_enabled()) return 0; - if (tmp_max_watts_dvfs != NULL) + if (max_watts_dvfs != NULL) tmp_max_watts_dvfs = xmalloc(sizeof(uint32_t)*(allowed_freqs[0]+1)); @@ -480,7 +484,7 @@ uint32_t powercap_get_node_bitmap_maxwatts_dvfs(bitstr_t *idle_bitmap, idle_bitmap = tmp_bitmap; select_bitmap = tmp_bitmap; } - + for(i=0, node_ptr=node_record_table_ptr; i<node_record_count; i++, node_ptr++){ if (bit_test(idle_bitmap, i)) { @@ -499,26 +503,53 @@ uint32_t powercap_get_node_bitmap_maxwatts_dvfs(bitstr_t *idle_bitmap, } else if (bit_test(select_bitmap, i)) { layouts_entity_get_mkv(l_name, node_ptr->name, - "IdleWatts,MaxWatts,CoresCount,LastCore", - data, (sizeof(uint32_t) * 4), L_T_UINT32); + "IdleWatts,MaxWatts,CoresCount,LastCore,CurrentPower", + data, (sizeof(uint32_t) * 5), L_T_UINT32); /* tmp_max_watts = IdleWatts - cpus*IdleCoreWatts + cpus*MaxCoreWatts */ sprintf(ename, "virtualcore%u", data[3]); - for(p=1; p<allowed_freqs[0] + 1; p++){ + if (num_cpus == 0) + num_cpus = data[2]; + layouts_entity_get_mkv(l_name, ename, + "IdleCoreWatts,MaxCoreWatts", + core_data, + (sizeof(uint32_t) * 2), + L_T_UINT32); + if (data[4] == 0) + tmp_max_watts += data[0] - + num_cpus*core_data[0] + + num_cpus*core_data[1]; + else if (data[4] > 0) + tmp_max_watts += data[4] - + num_cpus*core_data[0] + + num_cpus*core_data[1]; + else if (num_cpus == data[2]) + tmp_max_watts += data[1]; + + for (p=1; p<allowed_freqs[0] + 1; p++){ sprintf(keyname, - "IdleCoreWatts,MaxCoreWatts,Cpufreq%dWatts", - allowed_freqs[p]); + "IdleCoreWatts,MaxCoreWatts,Cpufreq%dWatts," + "CurrentCorePower", allowed_freqs[p]); layouts_entity_get_mkv(l_name, ename, keyname, - core_data, (sizeof(uint32_t) * 3), + core_data, (sizeof(uint32_t) * 4), L_T_UINT32); - tmp_max_watts_dvfs[p] += num_cpus*core_data[2]; - if(num_cpus == data[2]) - tmp_max_watts += data[1]; - else - tmp_max_watts = data[0] - - num_cpus*core_data[0] + - num_cpus*core_data[1]; + if (num_cpus == data[2]) + tmp_max_watts_dvfs[p] += + num_cpus*core_data[2]; + else { + if(data[4] == 0){ + tmp_max_watts_dvfs[p] += + data[0] - + num_cpus*core_data[0] + + num_cpus*core_data[2]; + } else { + tmp_max_watts_dvfs[p] += + data[4] - + num_cpus*core_data[0] + + num_cpus*core_data[2]; + } + } } } else { /* non idle nodes, 2 cases : down or not */