X-Git-Url: http://git.efficios.com/?a=blobdiff_plain;f=kernel%2Fsched.c;h=55112261027b6d8e6a839b3f32cc700e09fbef06;hb=a52bfd73589eaf88d9c95ad2c1de0b38a6b27972;hp=da1edc8277d0bdfb74d1e76c3d420dbb0a433efb;hpb=9aa55fbd01779a0b476d87cd9b5170fd5bebab1d;p=deliverable%2Flinux.git diff --git a/kernel/sched.c b/kernel/sched.c index da1edc8277d0..55112261027b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3699,6 +3699,28 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, } #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ +static void update_sched_power(struct sched_domain *sd) +{ + struct sched_domain *child = sd->child; + struct sched_group *group, *sdg = sd->groups; + unsigned long power = sdg->__cpu_power; + + if (!child) { + /* compute cpu power for this cpu */ + return; + } + + sdg->__cpu_power = 0; + + group = child->groups; + do { + sdg->__cpu_power += group->__cpu_power; + group = group->next; + } while (group != child->groups); + + if (power != sdg->__cpu_power) + sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power); +} /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. @@ -3712,7 +3734,8 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, * @balance: Should we balance. * @sgs: variable to hold the statistics for this group. */ -static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu, +static inline void update_sg_lb_stats(struct sched_domain *sd, + struct sched_group *group, int this_cpu, enum cpu_idle_type idle, int load_idx, int *sd_idle, int local_group, const struct cpumask *cpus, int *balance, struct sg_lb_stats *sgs) @@ -3723,8 +3746,11 @@ static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu, unsigned long sum_avg_load_per_task; unsigned long avg_load_per_task; - if (local_group) + if (local_group) { balance_cpu = group_first_cpu(group); + if (balance_cpu == this_cpu) + update_sched_power(sd); + } /* Tally up the load of all CPUs in the group */ sum_avg_load_per_task = avg_load_per_task = 0; @@ -3811,9 +3837,13 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, const struct cpumask *cpus, int *balance, struct sd_lb_stats *sds) { + struct sched_domain *child = sd->child; struct sched_group *group = sd->groups; struct sg_lb_stats sgs; - int load_idx; + int load_idx, prefer_sibling = 0; + + if (child && child->flags & SD_PREFER_SIBLING) + prefer_sibling = 1; init_sd_power_savings_stats(sd, sds, idle); load_idx = get_sd_load_idx(sd, idle); @@ -3824,7 +3854,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(group)); memset(&sgs, 0, sizeof(sgs)); - update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle, + update_sg_lb_stats(sd, group, this_cpu, idle, load_idx, sd_idle, local_group, cpus, balance, &sgs); if (local_group && balance && !(*balance)) @@ -3833,6 +3863,14 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, sds->total_load += sgs.group_load; sds->total_pwr += group->__cpu_power; + /* + * In case the child domain prefers tasks go to siblings + * first, lower the group capacity to one so that we'll try + * and move all the excess tasks away. + */ + if (prefer_sibling) + sgs.group_capacity = 1; + if (local_group) { sds->this_load = sgs.avg_load; sds->this = group; @@ -3851,7 +3889,6 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, update_sd_power_savings_stats(group, sds, local_group, &sgs); group = group->next; } while (group != sd->groups); - } /** @@ -8464,15 +8501,13 @@ static void free_sched_groups(const struct cpumask *cpu_map, * there are asymmetries in the topology. If there are asymmetries, group * having more cpu_power will pickup more load compared to the group having * less cpu_power. - * - * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents - * the maximum number of tasks a group can handle in the presence of other idle - * or lightly loaded groups in the same sched domain. */ static void init_sched_groups_power(int cpu, struct sched_domain *sd) { struct sched_domain *child; struct sched_group *group; + long power; + int weight; WARN_ON(!sd || !sd->groups); @@ -8483,22 +8518,26 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) sd->groups->__cpu_power = 0; - /* - * For perf policy, if the groups in child domain share resources - * (for example cores sharing some portions of the cache hierarchy - * or SMT), then set this domain groups cpu_power such that each group - * can handle only one task, when there are other idle groups in the - * same sched domain. - */ - if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) && - (child->flags & - (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) { - sg_inc_cpu_power(sd->groups, SCHED_LOAD_SCALE); + if (!child) { + power = SCHED_LOAD_SCALE; + weight = cpumask_weight(sched_domain_span(sd)); + /* + * SMT siblings share the power of a single core. + * Usually multiple threads get a better yield out of + * that one core than a single thread would have, + * reflect that in sd->smt_gain. + */ + if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { + power *= sd->smt_gain; + power /= weight; + power >>= SCHED_LOAD_SHIFT; + } + sg_inc_cpu_power(sd->groups, power); return; } /* - * add cpu_power of each child group to this groups cpu_power + * Add cpu_power of each child group to this groups cpu_power. */ group = child->groups; do {