sched: Add smt_gain

[deliverable/linux.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index da1edc8277d0bdfb74d1e76c3d420dbb0a433efb..55112261027b6d8e6a839b3f32cc700e09fbef06 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3699,6 +3699,28 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
  }
  #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
  
+static void update_sched_power(struct sched_domain *sd)
+{
+       struct sched_domain *child = sd->child;
+       struct sched_group *group, *sdg = sd->groups;
+       unsigned long power = sdg->__cpu_power;
+
+       if (!child) {
+               /* compute cpu power for this cpu */
+               return;
+       }
+
+       sdg->__cpu_power = 0;
+
+       group = child->groups;
+       do {
+               sdg->__cpu_power += group->__cpu_power;
+               group = group->next;
+       } while (group != child->groups);
+
+       if (power != sdg->__cpu_power)
+               sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power);
+}
  
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
@@ -3712,7 +3734,8 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
   * @balance: Should we balance.
   * @sgs: variable to hold the statistics for this group.
   */
-static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
+static inline void update_sg_lb_stats(struct sched_domain *sd,
+                       struct sched_group *group, int this_cpu,
                         enum cpu_idle_type idle, int load_idx, int *sd_idle,
                         int local_group, const struct cpumask *cpus,
                         int *balance, struct sg_lb_stats *sgs)
@@ -3723,8 +3746,11 @@ static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
         unsigned long sum_avg_load_per_task;
         unsigned long avg_load_per_task;
  
-       if (local_group)
+       if (local_group) {
                 balance_cpu = group_first_cpu(group);
+               if (balance_cpu == this_cpu)
+                       update_sched_power(sd);
+       }
  
         /* Tally up the load of all CPUs in the group */
         sum_avg_load_per_task = avg_load_per_task = 0;
@@ -3811,9 +3837,13 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
                         const struct cpumask *cpus, int *balance,
                         struct sd_lb_stats *sds)
  {
+       struct sched_domain *child = sd->child;
         struct sched_group *group = sd->groups;
         struct sg_lb_stats sgs;
-       int load_idx;
+       int load_idx, prefer_sibling = 0;
+
+       if (child && child->flags & SD_PREFER_SIBLING)
+               prefer_sibling = 1;
  
         init_sd_power_savings_stats(sd, sds, idle);
         load_idx = get_sd_load_idx(sd, idle);
@@ -3824,7 +3854,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
                 local_group = cpumask_test_cpu(this_cpu,
                                                sched_group_cpus(group));
                 memset(&sgs, 0, sizeof(sgs));
-               update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
+               update_sg_lb_stats(sd, group, this_cpu, idle, load_idx, sd_idle,
                                 local_group, cpus, balance, &sgs);
  
                 if (local_group && balance && !(*balance))
@@ -3833,6 +3863,14 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
                 sds->total_load += sgs.group_load;
                 sds->total_pwr += group->__cpu_power;
  
+               /*
+                * In case the child domain prefers tasks go to siblings
+                * first, lower the group capacity to one so that we'll try
+                * and move all the excess tasks away.
+                */
+               if (prefer_sibling)
+                       sgs.group_capacity = 1;
+
                 if (local_group) {
                         sds->this_load = sgs.avg_load;
                         sds->this = group;
@@ -3851,7 +3889,6 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
                 update_sd_power_savings_stats(group, sds, local_group, &sgs);
                 group = group->next;
         } while (group != sd->groups);
-
  }
  
  /**
@@ -8464,15 +8501,13 @@ static void free_sched_groups(const struct cpumask *cpu_map,
   * there are asymmetries in the topology. If there are asymmetries, group
   * having more cpu_power will pickup more load compared to the group having
   * less cpu_power.
- *
- * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents
- * the maximum number of tasks a group can handle in the presence of other idle
- * or lightly loaded groups in the same sched domain.
   */
  static void init_sched_groups_power(int cpu, struct sched_domain *sd)
  {
         struct sched_domain *child;
         struct sched_group *group;
+       long power;
+       int weight;
  
         WARN_ON(!sd || !sd->groups);
  
@@ -8483,22 +8518,26 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
  
         sd->groups->__cpu_power = 0;
  
-       /*
-        * For perf policy, if the groups in child domain share resources
-        * (for example cores sharing some portions of the cache hierarchy
-        * or SMT), then set this domain groups cpu_power such that each group
-        * can handle only one task, when there are other idle groups in the
-        * same sched domain.
-        */
-       if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
-                      (child->flags &
-                       (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) {
-               sg_inc_cpu_power(sd->groups, SCHED_LOAD_SCALE);
+       if (!child) {
+               power = SCHED_LOAD_SCALE;
+               weight = cpumask_weight(sched_domain_span(sd));
+               /*
+                * SMT siblings share the power of a single core.
+                * Usually multiple threads get a better yield out of
+                * that one core than a single thread would have,
+                * reflect that in sd->smt_gain.
+                */
+               if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
+                       power *= sd->smt_gain;
+                       power /= weight;
+                       power >>= SCHED_LOAD_SHIFT;
+               }
+               sg_inc_cpu_power(sd->groups, power);
                 return;
         }
  
         /*
-        * add cpu_power of each child group to this groups cpu_power
+        * Add cpu_power of each child group to this groups cpu_power.
          */
         group = child->groups;
         do {