Merge remote-tracking branch 'rcu/rcu/next'
authorStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 02:12:37 +0000 (12:12 +1000)
committerStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 02:12:37 +0000 (12:12 +1000)
1  2 
include/linux/bug.h
kernel/cpu.c
kernel/rcu/tree.c
kernel/sched/core.c
lib/Kconfig.debug

diff --combined include/linux/bug.h
index 292d6a10b0c2683ecbd57858ca069e3dc0bb952e,51a486f4eb4c5c5eda6d062a306501eb4ee78c39..baff2e8fc8a82792045c3bb0112fb52cead7e6cb
@@@ -13,7 -13,6 +13,7 @@@ enum bug_trap_type 
  struct pt_regs;
  
  #ifdef __CHECKER__
 +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
  #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
  #define BUILD_BUG_ON_ZERO(e) (0)
  #define BUILD_BUG_ON_NULL(e) ((void*)0)
@@@ -25,8 -24,6 +25,8 @@@
  #else /* __CHECKER__ */
  
  /* Force a compilation error if a constant expression is not a power of 2 */
 +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)      \
 +      BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
  #define BUILD_BUG_ON_NOT_POWER_OF_2(n)                        \
        BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
  
@@@ -121,4 -118,21 +121,21 @@@ static inline enum bug_trap_type report
  }
  
  #endif        /* CONFIG_GENERIC_BUG */
+ /*
+  * Since detected data corruption should stop operation on the affected
+  * structures, this returns false if the corruption condition is found.
+  */
+ #define CHECK_DATA_CORRUPTION(condition, fmt, ...)                     \
+       do {                                                             \
+               if (unlikely(condition)) {                               \
+                       if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
+                               pr_err(fmt, ##__VA_ARGS__);              \
+                               BUG();                                   \
+                       } else                                           \
+                               WARN(1, fmt, ##__VA_ARGS__);             \
+                       return false;                                    \
+               }                                                        \
+       } while (0)
  #endif        /* _LINUX_BUG_H */
diff --combined kernel/cpu.c
index e7eca02c757f7ede7743cabb00aa64c787e90e7a,ff8bc3817dde87c3953863d0d61a785e5147f8df..5df20d6d152071b40244fb5d85279b8040a641ba
@@@ -23,8 -23,6 +23,8 @@@
  #include <linux/tick.h>
  #include <linux/irq.h>
  #include <linux/smpboot.h>
 +#include <linux/relay.h>
 +#include <linux/slab.h>
  
  #include <trace/events/power.h>
  #define CREATE_TRACE_POINTS
@@@ -39,9 -37,8 +39,9 @@@
   * @thread:   Pointer to the hotplug thread
   * @should_run:       Thread should execute
   * @rollback: Perform a rollback
 - * @cb_stat:  The state for a single callback (install/uninstall)
 - * @cb:               Single callback function (install/uninstall)
 + * @single:   Single callback invocation
 + * @bringup:  Single callback bringup or teardown selector
 + * @cb_state: The state for a single callback (install/uninstall)
   * @result:   Result of the operation
   * @done:     Signal completion to the issuer of the task
   */
@@@ -52,10 -49,8 +52,10 @@@ struct cpuhp_cpu_state 
        struct task_struct      *thread;
        bool                    should_run;
        bool                    rollback;
 +      bool                    single;
 +      bool                    bringup;
 +      struct hlist_node       *node;
        enum cpuhp_state        cb_state;
 -      int                     (*cb)(unsigned int cpu);
        int                     result;
        struct completion       done;
  #endif
@@@ -73,103 -68,35 +73,103 @@@ static DEFINE_PER_CPU(struct cpuhp_cpu_
   * @cant_stop:        Bringup/teardown can't be stopped at this step
   */
  struct cpuhp_step {
 -      const char      *name;
 -      int             (*startup)(unsigned int cpu);
 -      int             (*teardown)(unsigned int cpu);
 -      bool            skip_onerr;
 -      bool            cant_stop;
 +      const char              *name;
 +      union {
 +              int             (*single)(unsigned int cpu);
 +              int             (*multi)(unsigned int cpu,
 +                                       struct hlist_node *node);
 +      } startup;
 +      union {
 +              int             (*single)(unsigned int cpu);
 +              int             (*multi)(unsigned int cpu,
 +                                       struct hlist_node *node);
 +      } teardown;
 +      struct hlist_head       list;
 +      bool                    skip_onerr;
 +      bool                    cant_stop;
 +      bool                    multi_instance;
  };
  
  static DEFINE_MUTEX(cpuhp_state_mutex);
  static struct cpuhp_step cpuhp_bp_states[];
  static struct cpuhp_step cpuhp_ap_states[];
  
 +static bool cpuhp_is_ap_state(enum cpuhp_state state)
 +{
 +      /*
 +       * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
 +       * purposes as that state is handled explicitly in cpu_down.
 +       */
 +      return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
 +}
 +
 +static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 +{
 +      struct cpuhp_step *sp;
 +
 +      sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
 +      return sp + state;
 +}
 +
  /**
   * cpuhp_invoke_callback _ Invoke the callbacks for a given state
   * @cpu:      The cpu for which the callback should be invoked
   * @step:     The step in the state machine
 - * @cb:               The callback function to invoke
 + * @bringup:  True if the bringup callback should be invoked
   *
 - * Called from cpu hotplug and from the state register machinery
 + * Called from cpu hotplug and from the state register machinery.
   */
 -static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
 -                               int (*cb)(unsigned int))
 +static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 +                               bool bringup, struct hlist_node *node)
  {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 -      int ret = 0;
 -
 -      if (cb) {
 -              trace_cpuhp_enter(cpu, st->target, step, cb);
 +      struct cpuhp_step *step = cpuhp_get_step(state);
 +      int (*cbm)(unsigned int cpu, struct hlist_node *node);
 +      int (*cb)(unsigned int cpu);
 +      int ret, cnt;
 +
 +      if (!step->multi_instance) {
 +              cb = bringup ? step->startup.single : step->teardown.single;
 +              if (!cb)
 +                      return 0;
 +              trace_cpuhp_enter(cpu, st->target, state, cb);
                ret = cb(cpu);
 -              trace_cpuhp_exit(cpu, st->state, step, ret);
 +              trace_cpuhp_exit(cpu, st->state, state, ret);
 +              return ret;
 +      }
 +      cbm = bringup ? step->startup.multi : step->teardown.multi;
 +      if (!cbm)
 +              return 0;
 +
 +      /* Single invocation for instance add/remove */
 +      if (node) {
 +              trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 +              ret = cbm(cpu, node);
 +              trace_cpuhp_exit(cpu, st->state, state, ret);
 +              return ret;
 +      }
 +
 +      /* State transition. Invoke on all instances */
 +      cnt = 0;
 +      hlist_for_each(node, &step->list) {
 +              trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 +              ret = cbm(cpu, node);
 +              trace_cpuhp_exit(cpu, st->state, state, ret);
 +              if (ret)
 +                      goto err;
 +              cnt++;
 +      }
 +      return 0;
 +err:
 +      /* Rollback the instances if one failed */
 +      cbm = !bringup ? step->startup.multi : step->teardown.multi;
 +      if (!cbm)
 +              return ret;
 +
 +      hlist_for_each(node, &step->list) {
 +              if (!cnt--)
 +                      break;
 +              cbm(cpu, node);
        }
        return ret;
  }
@@@ -333,17 -260,10 +333,17 @@@ void cpu_hotplug_disable(void
  }
  EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
  
 +static void __cpu_hotplug_enable(void)
 +{
 +      if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
 +              return;
 +      cpu_hotplug_disabled--;
 +}
 +
  void cpu_hotplug_enable(void)
  {
        cpu_maps_update_begin();
 -      WARN_ON(--cpu_hotplug_disabled < 0);
 +      __cpu_hotplug_enable();
        cpu_maps_update_done();
  }
  EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
@@@ -410,6 -330,12 +410,6 @@@ static int notify_online(unsigned int c
        return 0;
  }
  
 -static int notify_starting(unsigned int cpu)
 -{
 -      cpu_notify(CPU_STARTING, cpu);
 -      return 0;
 -}
 -
  static int bringup_wait_for_ap(unsigned int cpu)
  {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@@ -423,16 -349,8 +423,16 @@@ static int bringup_cpu(unsigned int cpu
        struct task_struct *idle = idle_thread_get(cpu);
        int ret;
  
 +      /*
 +       * Some architectures have to walk the irq descriptors to
 +       * setup the vector space for the cpu which comes online.
 +       * Prevent irq alloc/free across the bringup.
 +       */
 +      irq_lock_sparse();
 +
        /* Arch-specific enabling code. */
        ret = __cpu_up(cpu, idle);
 +      irq_unlock_sparse();
        if (ret) {
                cpu_notify(CPU_UP_CANCELED, cpu);
                return ret;
  /*
   * Hotplug state machine related functions
   */
 -static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
 -                        struct cpuhp_step *steps)
 +static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
  {
        for (st->state++; st->state < st->target; st->state++) {
 -              struct cpuhp_step *step = steps + st->state;
 +              struct cpuhp_step *step = cpuhp_get_step(st->state);
  
                if (!step->skip_onerr)
 -                      cpuhp_invoke_callback(cpu, st->state, step->startup);
 +                      cpuhp_invoke_callback(cpu, st->state, true, NULL);
        }
  }
  
  static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 -                              struct cpuhp_step *steps, enum cpuhp_state target)
 +                              enum cpuhp_state target)
  {
        enum cpuhp_state prev_state = st->state;
        int ret = 0;
  
        for (; st->state > target; st->state--) {
 -              struct cpuhp_step *step = steps + st->state;
 -
 -              ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
 +              ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
                if (ret) {
                        st->target = prev_state;
 -                      undo_cpu_down(cpu, st, steps);
 +                      undo_cpu_down(cpu, st);
                        break;
                }
        }
        return ret;
  }
  
 -static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
 -                      struct cpuhp_step *steps)
 +static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
  {
        for (st->state--; st->state > st->target; st->state--) {
 -              struct cpuhp_step *step = steps + st->state;
 +              struct cpuhp_step *step = cpuhp_get_step(st->state);
  
                if (!step->skip_onerr)
 -                      cpuhp_invoke_callback(cpu, st->state, step->teardown);
 +                      cpuhp_invoke_callback(cpu, st->state, false, NULL);
        }
  }
  
  static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 -                            struct cpuhp_step *steps, enum cpuhp_state target)
 +                            enum cpuhp_state target)
  {
        enum cpuhp_state prev_state = st->state;
        int ret = 0;
  
        while (st->state < target) {
 -              struct cpuhp_step *step;
 -
                st->state++;
 -              step = steps + st->state;
 -              ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
 +              ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
                if (ret) {
                        st->target = prev_state;
 -                      undo_cpu_up(cpu, st, steps);
 +                      undo_cpu_up(cpu, st);
                        break;
                }
        }
@@@ -522,13 -447,13 +522,13 @@@ static int cpuhp_ap_offline(unsigned in
  {
        enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
  
 -      return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
 +      return cpuhp_down_callbacks(cpu, st, target);
  }
  
  /* Execute the online startup callbacks. Used to be CPU_ONLINE */
  static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
  {
 -      return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
 +      return cpuhp_up_callbacks(cpu, st, st->target);
  }
  
  /*
@@@ -551,20 -476,18 +551,20 @@@ static void cpuhp_thread_fun(unsigned i
        st->should_run = false;
  
        /* Single callback invocation for [un]install ? */
 -      if (st->cb) {
 +      if (st->single) {
                if (st->cb_state < CPUHP_AP_ONLINE) {
                        local_irq_disable();
 -                      ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
 +                      ret = cpuhp_invoke_callback(cpu, st->cb_state,
 +                                                  st->bringup, st->node);
                        local_irq_enable();
                } else {
 -                      ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
 +                      ret = cpuhp_invoke_callback(cpu, st->cb_state,
 +                                                  st->bringup, st->node);
                }
        } else if (st->rollback) {
                BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
  
 -              undo_cpu_down(cpu, st, cpuhp_ap_states);
 +              undo_cpu_down(cpu, st);
                /*
                 * This is a momentary workaround to keep the notifier users
                 * happy. Will go away once we got rid of the notifiers.
  }
  
  /* Invoke a single callback on a remote cpu */
 -static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
 -                                  int (*cb)(unsigned int))
 +static int
 +cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
 +                       struct hlist_node *node)
  {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  
         * we invoke the thread function directly.
         */
        if (!st->thread)
 -              return cpuhp_invoke_callback(cpu, state, cb);
 +              return cpuhp_invoke_callback(cpu, state, bringup, node);
  
        st->cb_state = state;
 -      st->cb = cb;
 +      st->single = true;
 +      st->bringup = bringup;
 +      st->node = node;
 +
        /*
         * Make sure the above stores are visible before should_run becomes
         * true. Paired with the mb() above in cpuhp_thread_fun()
  static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
  {
        st->result = 0;
 -      st->cb = NULL;
 +      st->single = false;
        /*
         * Make sure the above stores are visible before should_run becomes
         * true. Paired with the mb() above in cpuhp_thread_fun()
@@@ -755,6 -674,12 +755,6 @@@ static int notify_down_prepare(unsigne
        return err;
  }
  
 -static int notify_dying(unsigned int cpu)
 -{
 -      cpu_notify(CPU_DYING, cpu);
 -      return 0;
 -}
 -
  /* Take this CPU down. */
  static int take_cpu_down(void *_param)
  {
        if (err < 0)
                return err;
  
 +      /*
 +       * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
 +       * do this step again.
 +       */
 +      WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
 +      st->state--;
        /* Invoke the former CPU_DYING callbacks */
 -      for (; st->state > target; st->state--) {
 -              struct cpuhp_step *step = cpuhp_ap_states + st->state;
 +      for (; st->state > target; st->state--)
 +              cpuhp_invoke_callback(cpu, st->state, false, NULL);
  
 -              cpuhp_invoke_callback(cpu, st->state, step->teardown);
 -      }
        /* Give up timekeeping duties */
        tick_handover_do_timer();
        /* Park the stopper thread */
@@@ -813,7 -734,7 +813,7 @@@ static int takedown_cpu(unsigned int cp
        BUG_ON(cpu_online(cpu));
  
        /*
 -       * The migration_call() CPU_DYING callback will have removed all
 +       * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
         * runnable tasks from the cpu, there's only the idle task left now
         * that the migration thread is done doing the stop_machine thing.
         *
@@@ -866,6 -787,7 +866,6 @@@ void cpuhp_report_idle_dead(void
  #define notify_down_prepare   NULL
  #define takedown_cpu          NULL
  #define notify_dead           NULL
 -#define notify_dying          NULL
  #endif
  
  #ifdef CONFIG_HOTPLUG_CPU
@@@ -914,7 -836,7 +914,7 @@@ static int __ref _cpu_down(unsigned in
         * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
         * to do the further cleanups.
         */
 -      ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
 +      ret = cpuhp_down_callbacks(cpu, st, target);
        if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
                st->target = prev_state;
                st->rollback = true;
@@@ -955,9 -877,10 +955,9 @@@ EXPORT_SYMBOL(cpu_down)
  #endif /*CONFIG_HOTPLUG_CPU*/
  
  /**
 - * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
 + * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
   * @cpu: cpu that just started
   *
 - * This function calls the cpu_chain notifiers with CPU_STARTING.
   * It must be called by the arch code on the new cpu, before the new cpu
   * enables interrupts and before the "boot" cpu returns from __cpu_up().
   */
@@@ -966,9 -889,13 +966,10 @@@ void notify_cpu_starting(unsigned int c
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
  
+       rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
        while (st->state < target) {
 -              struct cpuhp_step *step;
 -
                st->state++;
 -              step = cpuhp_ap_states + st->state;
 -              cpuhp_invoke_callback(cpu, st->state, step->startup);
 +              cpuhp_invoke_callback(cpu, st->state, true, NULL);
        }
  }
  
@@@ -1053,7 -980,7 +1054,7 @@@ static int _cpu_up(unsigned int cpu, in
         * responsible for bringing it up to the target state.
         */
        target = min((int)target, CPUHP_BRINGUP_CPU);
 -      ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
 +      ret = cpuhp_up_callbacks(cpu, st, target);
  out:
        cpu_hotplug_done();
        return ret;
@@@ -1098,13 -1025,12 +1099,13 @@@ EXPORT_SYMBOL_GPL(cpu_up)
  #ifdef CONFIG_PM_SLEEP_SMP
  static cpumask_var_t frozen_cpus;
  
 -int disable_nonboot_cpus(void)
 +int freeze_secondary_cpus(int primary)
  {
 -      int cpu, first_cpu, error = 0;
 +      int cpu, error = 0;
  
        cpu_maps_update_begin();
 -      first_cpu = cpumask_first(cpu_online_mask);
 +      if (!cpu_online(primary))
 +              primary = cpumask_first(cpu_online_mask);
        /*
         * We take down all of the non-boot CPUs in one shot to avoid races
         * with the userspace trying to use the CPU hotplug at the same time
  
        pr_info("Disabling non-boot CPUs ...\n");
        for_each_online_cpu(cpu) {
 -              if (cpu == first_cpu)
 +              if (cpu == primary)
                        continue;
                trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
                error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
@@@ -1156,7 -1082,7 +1157,7 @@@ void enable_nonboot_cpus(void
  
        /* Allow everyone to use the CPU hotplug again */
        cpu_maps_update_begin();
 -      WARN_ON(--cpu_hotplug_disabled < 0);
 +      __cpu_hotplug_enable();
        if (cpumask_empty(frozen_cpus))
                goto out;
  
@@@ -1245,50 -1171,40 +1246,50 @@@ core_initcall(cpu_hotplug_pm_sync_init)
  static struct cpuhp_step cpuhp_bp_states[] = {
        [CPUHP_OFFLINE] = {
                .name                   = "offline",
 -              .startup                = NULL,
 -              .teardown               = NULL,
 +              .startup.single         = NULL,
 +              .teardown.single        = NULL,
        },
  #ifdef CONFIG_SMP
        [CPUHP_CREATE_THREADS]= {
 -              .name                   = "threads:create",
 -              .startup                = smpboot_create_threads,
 -              .teardown               = NULL,
 +              .name                   = "threads:prepare",
 +              .startup.single         = smpboot_create_threads,
 +              .teardown.single        = NULL,
                .cant_stop              = true,
        },
        [CPUHP_PERF_PREPARE] = {
 -              .name = "perf prepare",
 -              .startup = perf_event_init_cpu,
 -              .teardown = perf_event_exit_cpu,
 +              .name                   = "perf:prepare",
 +              .startup.single         = perf_event_init_cpu,
 +              .teardown.single        = perf_event_exit_cpu,
        },
        [CPUHP_WORKQUEUE_PREP] = {
 -              .name = "workqueue prepare",
 -              .startup = workqueue_prepare_cpu,
 -              .teardown = NULL,
 +              .name                   = "workqueue:prepare",
 +              .startup.single         = workqueue_prepare_cpu,
 +              .teardown.single        = NULL,
        },
        [CPUHP_HRTIMERS_PREPARE] = {
 -              .name = "hrtimers prepare",
 -              .startup = hrtimers_prepare_cpu,
 -              .teardown = hrtimers_dead_cpu,
 +              .name                   = "hrtimers:prepare",
 +              .startup.single         = hrtimers_prepare_cpu,
 +              .teardown.single        = hrtimers_dead_cpu,
        },
        [CPUHP_SMPCFD_PREPARE] = {
 -              .name = "SMPCFD prepare",
 -              .startup = smpcfd_prepare_cpu,
 -              .teardown = smpcfd_dead_cpu,
 +              .name                   = "smpcfd:prepare",
 +              .startup.single         = smpcfd_prepare_cpu,
 +              .teardown.single        = smpcfd_dead_cpu,
 +      },
 +      [CPUHP_RELAY_PREPARE] = {
 +              .name                   = "relay:prepare",
 +              .startup.single         = relay_prepare_cpu,
 +              .teardown.single        = NULL,
 +      },
 +      [CPUHP_SLAB_PREPARE] = {
 +              .name                   = "slab:prepare",
 +              .startup.single         = slab_prepare_cpu,
 +              .teardown.single        = slab_dead_cpu,
        },
        [CPUHP_RCUTREE_PREP] = {
 -              .name = "RCU-tree prepare",
 -              .startup = rcutree_prepare_cpu,
 -              .teardown = rcutree_dead_cpu,
 +              .name                   = "RCU/tree:prepare",
 +              .startup.single         = rcutree_prepare_cpu,
 +              .teardown.single        = rcutree_dead_cpu,
        },
        /*
         * Preparatory and dead notifiers. Will be replaced once the notifiers
         */
        [CPUHP_NOTIFY_PREPARE] = {
                .name                   = "notify:prepare",
 -              .startup                = notify_prepare,
 -              .teardown               = notify_dead,
 +              .startup.single         = notify_prepare,
 +              .teardown.single        = notify_dead,
                .skip_onerr             = true,
                .cant_stop              = true,
        },
         * otherwise a RCU stall occurs.
         */
        [CPUHP_TIMERS_DEAD] = {
 -              .name = "timers dead",
 -              .startup = NULL,
 -              .teardown = timers_dead_cpu,
 +              .name                   = "timers:dead",
 +              .startup.single         = NULL,
 +              .teardown.single        = timers_dead_cpu,
        },
        /* Kicks the plugged cpu into life */
        [CPUHP_BRINGUP_CPU] = {
                .name                   = "cpu:bringup",
 -              .startup                = bringup_cpu,
 -              .teardown               = NULL,
 +              .startup.single         = bringup_cpu,
 +              .teardown.single        = NULL,
                .cant_stop              = true,
        },
        [CPUHP_AP_SMPCFD_DYING] = {
 -              .startup = NULL,
 -              .teardown = smpcfd_dying_cpu,
 +              .name                   = "smpcfd:dying",
 +              .startup.single         = NULL,
 +              .teardown.single        = smpcfd_dying_cpu,
        },
        /*
         * Handled on controll processor until the plugged processor manages
         */
        [CPUHP_TEARDOWN_CPU] = {
                .name                   = "cpu:teardown",
 -              .startup                = NULL,
 -              .teardown               = takedown_cpu,
 +              .startup.single         = NULL,
 +              .teardown.single        = takedown_cpu,
                .cant_stop              = true,
        },
  #else
@@@ -1356,13 -1271,24 +1357,13 @@@ static struct cpuhp_step cpuhp_ap_state
        /* First state is scheduler control. Interrupts are disabled */
        [CPUHP_AP_SCHED_STARTING] = {
                .name                   = "sched:starting",
 -              .startup                = sched_cpu_starting,
 -              .teardown               = sched_cpu_dying,
 +              .startup.single         = sched_cpu_starting,
 +              .teardown.single        = sched_cpu_dying,
        },
        [CPUHP_AP_RCUTREE_DYING] = {
 -              .startup = NULL,
 -              .teardown = rcutree_dying_cpu,
 -      },
 -      /*
 -       * Low level startup/teardown notifiers. Run with interrupts
 -       * disabled. Will be removed once the notifiers are converted to
 -       * states.
 -       */
 -      [CPUHP_AP_NOTIFY_STARTING] = {
 -              .name                   = "notify:starting",
 -              .startup                = notify_starting,
 -              .teardown               = notify_dying,
 -              .skip_onerr             = true,
 -              .cant_stop              = true,
 +              .name                   = "RCU/tree:dying",
 +              .startup.single         = NULL,
 +              .teardown.single        = rcutree_dying_cpu,
        },
        /* Entry state on starting. Interrupts enabled from here on. Transient
         * state for synchronsization */
        },
        /* Handle smpboot threads park/unpark */
        [CPUHP_AP_SMPBOOT_THREADS] = {
 -              .name                   = "smpboot:threads",
 -              .startup                = smpboot_unpark_threads,
 -              .teardown               = NULL,
 +              .name                   = "smpboot/threads:online",
 +              .startup.single         = smpboot_unpark_threads,
 +              .teardown.single        = NULL,
        },
        [CPUHP_AP_PERF_ONLINE] = {
 -              .name = "perf online",
 -              .startup = perf_event_init_cpu,
 -              .teardown = perf_event_exit_cpu,
 +              .name                   = "perf:online",
 +              .startup.single         = perf_event_init_cpu,
 +              .teardown.single        = perf_event_exit_cpu,
        },
        [CPUHP_AP_WORKQUEUE_ONLINE] = {
 -              .name = "workqueue online",
 -              .startup = workqueue_online_cpu,
 -              .teardown = workqueue_offline_cpu,
 +              .name                   = "workqueue:online",
 +              .startup.single         = workqueue_online_cpu,
 +              .teardown.single        = workqueue_offline_cpu,
        },
        [CPUHP_AP_RCUTREE_ONLINE] = {
 -              .name = "RCU-tree online",
 -              .startup = rcutree_online_cpu,
 -              .teardown = rcutree_offline_cpu,
 +              .name                   = "RCU/tree:online",
 +              .startup.single         = rcutree_online_cpu,
 +              .teardown.single        = rcutree_offline_cpu,
        },
  
        /*
         */
        [CPUHP_AP_NOTIFY_ONLINE] = {
                .name                   = "notify:online",
 -              .startup                = notify_online,
 -              .teardown               = notify_down_prepare,
 +              .startup.single         = notify_online,
 +              .teardown.single        = notify_down_prepare,
                .skip_onerr             = true,
        },
  #endif
        /* Last state is scheduler control setting the cpu active */
        [CPUHP_AP_ACTIVE] = {
                .name                   = "sched:active",
 -              .startup                = sched_cpu_activate,
 -              .teardown               = sched_cpu_deactivate,
 +              .startup.single         = sched_cpu_activate,
 +              .teardown.single        = sched_cpu_deactivate,
        },
  #endif
  
        /* CPU is fully up and running. */
        [CPUHP_ONLINE] = {
                .name                   = "online",
 -              .startup                = NULL,
 -              .teardown               = NULL,
 +              .startup.single         = NULL,
 +              .teardown.single        = NULL,
        },
  };
  
@@@ -1431,42 -1357,54 +1432,42 @@@ static int cpuhp_cb_check(enum cpuhp_st
        return 0;
  }
  
 -static bool cpuhp_is_ap_state(enum cpuhp_state state)
 -{
 -      /*
 -       * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
 -       * purposes as that state is handled explicitely in cpu_down.
 -       */
 -      return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
 -}
 -
 -static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 -{
 -      struct cpuhp_step *sp;
 -
 -      sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
 -      return sp + state;
 -}
 -
  static void cpuhp_store_callbacks(enum cpuhp_state state,
                                  const char *name,
                                  int (*startup)(unsigned int cpu),
 -                                int (*teardown)(unsigned int cpu))
 +                                int (*teardown)(unsigned int cpu),
 +                                bool multi_instance)
  {
        /* (Un)Install the callbacks for further cpu hotplug operations */
        struct cpuhp_step *sp;
  
        mutex_lock(&cpuhp_state_mutex);
        sp = cpuhp_get_step(state);
 -      sp->startup = startup;
 -      sp->teardown = teardown;
 +      sp->startup.single = startup;
 +      sp->teardown.single = teardown;
        sp->name = name;
 +      sp->multi_instance = multi_instance;
 +      INIT_HLIST_HEAD(&sp->list);
        mutex_unlock(&cpuhp_state_mutex);
  }
  
  static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
  {
 -      return cpuhp_get_step(state)->teardown;
 +      return cpuhp_get_step(state)->teardown.single;
  }
  
  /*
   * Call the startup/teardown function for a step either on the AP or
   * on the current CPU.
   */
 -static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
 -                          int (*cb)(unsigned int), bool bringup)
 +static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
 +                          struct hlist_node *node)
  {
 +      struct cpuhp_step *sp = cpuhp_get_step(state);
        int ret;
  
 -      if (!cb)
 +      if ((bringup && !sp->startup.single) ||
 +          (!bringup && !sp->teardown.single))
                return 0;
        /*
         * The non AP bound callbacks can fail on bringup. On teardown
         */
  #ifdef CONFIG_SMP
        if (cpuhp_is_ap_state(state))
 -              ret = cpuhp_invoke_ap_callback(cpu, state, cb);
 +              ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
        else
 -              ret = cpuhp_invoke_callback(cpu, state, cb);
 +              ret = cpuhp_invoke_callback(cpu, state, bringup, node);
  #else
 -      ret = cpuhp_invoke_callback(cpu, state, cb);
 +      ret = cpuhp_invoke_callback(cpu, state, bringup, node);
  #endif
        BUG_ON(ret && !bringup);
        return ret;
   * Note: The teardown callbacks for rollback are not allowed to fail!
   */
  static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
 -                                 int (*teardown)(unsigned int cpu))
 +                                 struct hlist_node *node)
  {
        int cpu;
  
 -      if (!teardown)
 -              return;
 -
        /* Roll back the already executed steps on the other cpus */
        for_each_present_cpu(cpu) {
                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  
                /* Did we invoke the startup call on that cpu ? */
                if (cpustate >= state)
 -                      cpuhp_issue_call(cpu, state, teardown, false);
 +                      cpuhp_issue_call(cpu, state, false, node);
        }
  }
  
@@@ -1531,52 -1472,6 +1532,52 @@@ static int cpuhp_reserve_state(enum cpu
        return -ENOSPC;
  }
  
 +int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
 +                             bool invoke)
 +{
 +      struct cpuhp_step *sp;
 +      int cpu;
 +      int ret;
 +
 +      sp = cpuhp_get_step(state);
 +      if (sp->multi_instance == false)
 +              return -EINVAL;
 +
 +      get_online_cpus();
 +
 +      if (!invoke || !sp->startup.multi)
 +              goto add_node;
 +
 +      /*
 +       * Try to call the startup callback for each present cpu
 +       * depending on the hotplug state of the cpu.
 +       */
 +      for_each_present_cpu(cpu) {
 +              struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 +              int cpustate = st->state;
 +
 +              if (cpustate < state)
 +                      continue;
 +
 +              ret = cpuhp_issue_call(cpu, state, true, node);
 +              if (ret) {
 +                      if (sp->teardown.multi)
 +                              cpuhp_rollback_install(cpu, state, node);
 +                      goto err;
 +              }
 +      }
 +add_node:
 +      ret = 0;
 +      mutex_lock(&cpuhp_state_mutex);
 +      hlist_add_head(node, &sp->list);
 +      mutex_unlock(&cpuhp_state_mutex);
 +
 +err:
 +      put_online_cpus();
 +      return ret;
 +}
 +EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
 +
  /**
   * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
   * @state:    The state to setup
  int __cpuhp_setup_state(enum cpuhp_state state,
                        const char *name, bool invoke,
                        int (*startup)(unsigned int cpu),
 -                      int (*teardown)(unsigned int cpu))
 +                      int (*teardown)(unsigned int cpu),
 +                      bool multi_instance)
  {
        int cpu, ret = 0;
        int dyn_state = 0;
                state = ret;
        }
  
 -      cpuhp_store_callbacks(state, name, startup, teardown);
 +      cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
  
        if (!invoke || !startup)
                goto out;
                if (cpustate < state)
                        continue;
  
 -              ret = cpuhp_issue_call(cpu, state, startup, true);
 +              ret = cpuhp_issue_call(cpu, state, true, NULL);
                if (ret) {
 -                      cpuhp_rollback_install(cpu, state, teardown);
 -                      cpuhp_store_callbacks(state, NULL, NULL, NULL);
 +                      if (teardown)
 +                              cpuhp_rollback_install(cpu, state, NULL);
 +                      cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
                        goto out;
                }
        }
  }
  EXPORT_SYMBOL(__cpuhp_setup_state);
  
 +int __cpuhp_state_remove_instance(enum cpuhp_state state,
 +                                struct hlist_node *node, bool invoke)
 +{
 +      struct cpuhp_step *sp = cpuhp_get_step(state);
 +      int cpu;
 +
 +      BUG_ON(cpuhp_cb_check(state));
 +
 +      if (!sp->multi_instance)
 +              return -EINVAL;
 +
 +      get_online_cpus();
 +      if (!invoke || !cpuhp_get_teardown_cb(state))
 +              goto remove;
 +      /*
 +       * Call the teardown callback for each present cpu depending
 +       * on the hotplug state of the cpu. This function is not
 +       * allowed to fail currently!
 +       */
 +      for_each_present_cpu(cpu) {
 +              struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 +              int cpustate = st->state;
 +
 +              if (cpustate >= state)
 +                      cpuhp_issue_call(cpu, state, false, node);
 +      }
 +
 +remove:
 +      mutex_lock(&cpuhp_state_mutex);
 +      hlist_del(node);
 +      mutex_unlock(&cpuhp_state_mutex);
 +      put_online_cpus();
 +
 +      return 0;
 +}
 +EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
  /**
   * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
   * @state:    The state to remove
   */
  void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
  {
 -      int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
 +      struct cpuhp_step *sp = cpuhp_get_step(state);
        int cpu;
  
        BUG_ON(cpuhp_cb_check(state));
  
        get_online_cpus();
  
 -      if (!invoke || !teardown)
 +      if (sp->multi_instance) {
 +              WARN(!hlist_empty(&sp->list),
 +                   "Error: Removing state %d which has instances left.\n",
 +                   state);
 +              goto remove;
 +      }
 +
 +      if (!invoke || !cpuhp_get_teardown_cb(state))
                goto remove;
  
        /*
                int cpustate = st->state;
  
                if (cpustate >= state)
 -                      cpuhp_issue_call(cpu, state, teardown, false);
 +                      cpuhp_issue_call(cpu, state, false, NULL);
        }
  remove:
 -      cpuhp_store_callbacks(state, NULL, NULL, NULL);
 +      cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
        put_online_cpus();
  }
  EXPORT_SYMBOL(__cpuhp_remove_state);
diff --combined kernel/rcu/tree.c
index e5164deb51e1e2e7f8079e41fed5cc2678ea4b4b,fead485b21f3c3e345bea82e43353343072df816..96c52e43f7cac0e5d6b41004c0c72d269c351f4a
@@@ -41,7 -41,6 +41,6 @@@
  #include <linux/export.h>
  #include <linux/completion.h>
  #include <linux/moduleparam.h>
- #include <linux/module.h>
  #include <linux/percpu.h>
  #include <linux/notifier.h>
  #include <linux/cpu.h>
@@@ -60,7 -59,6 +59,6 @@@
  #include "tree.h"
  #include "rcu.h"
  
- MODULE_ALIAS("rcutree");
  #ifdef MODULE_PARAM_PREFIX
  #undef MODULE_PARAM_PREFIX
  #endif
@@@ -1306,7 -1304,8 +1304,8 @@@ static void rcu_stall_kick_kthreads(str
        if (!rcu_kick_kthreads)
                return;
        j = READ_ONCE(rsp->jiffies_kick_kthreads);
-       if (time_after(jiffies, j) && rsp->gp_kthread) {
+       if (time_after(jiffies, j) && rsp->gp_kthread &&
+           (rcu_gp_in_progress(rsp) || READ_ONCE(rsp->gp_flags))) {
                WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name);
                rcu_ftrace_dump(DUMP_ALL);
                wake_up_process(rsp->gp_kthread);
@@@ -1848,6 -1847,7 +1847,7 @@@ static bool __note_gp_changes(struct rc
                              struct rcu_data *rdp)
  {
        bool ret;
+       bool need_gp;
  
        /* Handle the ends of any preceding grace periods first. */
        if (rdp->completed == rnp->completed &&
                 */
                rdp->gpnum = rnp->gpnum;
                trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
-               rdp->cpu_no_qs.b.norm = true;
+               need_gp = !!(rnp->qsmask & rdp->grpmask);
+               rdp->cpu_no_qs.b.norm = need_gp;
                rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
-               rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask);
+               rdp->core_needs_qs = need_gp;
                zero_cpu_stall_ticks(rdp);
                WRITE_ONCE(rdp->gpwrap, false);
        }
@@@ -2344,7 -2345,7 +2345,7 @@@ static void rcu_report_qs_rsp(struct rc
        WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
        WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
        raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
-       swake_up(&rsp->gp_wq);  /* Memory barrier implied by swake_up() path. */
+       rcu_gp_kthread_wake(rsp);
  }
  
  /*
@@@ -2828,8 -2829,7 +2829,7 @@@ static void rcu_do_batch(struct rcu_sta
   * Also schedule RCU core processing.
   *
   * This function must be called from hardirq context.  It is normally
-  * invoked from the scheduling-clock interrupt.  If rcu_pending returns
-  * false, there is no point in invoking rcu_check_callbacks().
+  * invoked from the scheduling-clock interrupt.
   */
  void rcu_check_callbacks(int user)
  {
@@@ -2970,7 -2970,7 +2970,7 @@@ static void force_quiescent_state(struc
        }
        WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
        raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
-       swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
+       rcu_gp_kthread_wake(rsp);
  }
  
  /*
@@@ -3013,7 -3013,7 +3013,7 @@@ __rcu_process_callbacks(struct rcu_stat
  /*
   * Do RCU core processing for the current CPU.
   */
 -static void rcu_process_callbacks(struct softirq_action *unused)
 +static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
  {
        struct rcu_state *rsp;
  
@@@ -3121,7 -3121,9 +3121,9 @@@ __call_rcu(struct rcu_head *head, rcu_c
        unsigned long flags;
        struct rcu_data *rdp;
  
-       WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */
+       /* Misaligned rcu_head! */
+       WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
        if (debug_rcu_head_queue(head)) {
                /* Probable double call_rcu(), so leak the callback. */
                WRITE_ONCE(head->func, rcu_leak_callback);
        }
        head->func = func;
        head->next = NULL;
-       /*
-        * Opportunistically note grace-period endings and beginnings.
-        * Note that we might see a beginning right after we see an
-        * end, but never vice versa, since this CPU has to pass through
-        * a quiescent state betweentimes.
-        */
        local_irq_save(flags);
        rdp = this_cpu_ptr(rsp->rda);
  
@@@ -3792,8 -3787,6 +3787,6 @@@ rcu_init_percpu_data(int cpu, struct rc
        rnp = rdp->mynode;
        mask = rdp->grpmask;
        raw_spin_lock_rcu_node(rnp);            /* irqs already disabled. */
-       rnp->qsmaskinitnext |= mask;
-       rnp->expmaskinitnext |= mask;
        if (!rdp->beenonline)
                WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
        rdp->beenonline = true;  /* We have now been online. */
@@@ -3860,6 -3853,32 +3853,32 @@@ int rcutree_dead_cpu(unsigned int cpu
        return 0;
  }
  
+ /*
+  * Mark the specified CPU as being online so that subsequent grace periods
+  * (both expedited and normal) will wait on it.  Note that this means that
+  * incoming CPUs are not allowed to use RCU read-side critical sections
+  * until this function is called.  Failing to observe this restriction
+  * will result in lockdep splats.
+  */
+ void rcu_cpu_starting(unsigned int cpu)
+ {
+       unsigned long flags;
+       unsigned long mask;
+       struct rcu_data *rdp;
+       struct rcu_node *rnp;
+       struct rcu_state *rsp;
+       for_each_rcu_flavor(rsp) {
+               rdp = this_cpu_ptr(rsp->rda);
+               rnp = rdp->mynode;
+               mask = rdp->grpmask;
+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
+               rnp->qsmaskinitnext |= mask;
+               rnp->expmaskinitnext |= mask;
+               raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+       }
+ }
  #ifdef CONFIG_HOTPLUG_CPU
  /*
   * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
@@@ -4209,8 -4228,10 +4228,10 @@@ void __init rcu_init(void
         * or the scheduler are operational.
         */
        pm_notifier(rcu_pm_notify, 0);
-       for_each_online_cpu(cpu)
+       for_each_online_cpu(cpu) {
                rcutree_prepare_cpu(cpu);
+               rcu_cpu_starting(cpu);
+       }
  }
  
  #include "tree_exp.h"
diff --combined kernel/sched/core.c
index a0086a5fc00893d63f1b0c42ac85f63508d07a2d,2a18856f00ab906b6b553ed58c37c8e4f04397bb..f5f7b3cdf0be0577d69fc44e4390e7d9a1e75c88
@@@ -74,7 -74,6 +74,7 @@@
  #include <linux/context_tracking.h>
  #include <linux/compiler.h>
  #include <linux/frame.h>
 +#include <linux/prefetch.h>
  
  #include <asm/switch_to.h>
  #include <asm/tlb.h>
@@@ -581,6 -580,8 +581,8 @@@ static bool wake_up_full_nohz_cpu(int c
         * If needed we can still optimize that later with an
         * empty IRQ.
         */
+       if (cpu_is_offline(cpu))
+               return true;  /* Don't try to wake offline CPUs. */
        if (tick_nohz_full_cpu(cpu)) {
                if (cpu != smp_processor_id() ||
                    tick_nohz_tick_stopped())
        return false;
  }
  
+ /*
+  * Wake up the specified CPU.  If the CPU is going offline, it is the
+  * caller's responsibility to deal with the lost wakeup, for example,
+  * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
+  */
  void wake_up_nohz_cpu(int cpu)
  {
        if (!wake_up_full_nohz_cpu(cpu))
@@@ -1265,7 -1271,7 +1272,7 @@@ static void __migrate_swap_task(struct 
                /*
                 * Task isn't running anymore; make it appear like we migrated
                 * it before it went to sleep. This means on wakeup we make the
 -               * previous cpu our targer instead of where it really is.
 +               * previous cpu our target instead of where it really is.
                 */
                p->wake_cpu = cpu;
        }
@@@ -1629,25 -1635,23 +1636,25 @@@ static inline int __set_cpus_allowed_pt
  static void
  ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
  {
 -#ifdef CONFIG_SCHEDSTATS
 -      struct rq *rq = this_rq();
 +      struct rq *rq;
  
 -#ifdef CONFIG_SMP
 -      int this_cpu = smp_processor_id();
 +      if (!schedstat_enabled())
 +              return;
 +
 +      rq = this_rq();
  
 -      if (cpu == this_cpu) {
 -              schedstat_inc(rq, ttwu_local);
 -              schedstat_inc(p, se.statistics.nr_wakeups_local);
 +#ifdef CONFIG_SMP
 +      if (cpu == rq->cpu) {
 +              schedstat_inc(rq->ttwu_local);
 +              schedstat_inc(p->se.statistics.nr_wakeups_local);
        } else {
                struct sched_domain *sd;
  
 -              schedstat_inc(pse.statistics.nr_wakeups_remote);
 +              schedstat_inc(p->se.statistics.nr_wakeups_remote);
                rcu_read_lock();
 -              for_each_domain(this_cpu, sd) {
 +              for_each_domain(rq->cpu, sd) {
                        if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 -                              schedstat_inc(sdttwu_wake_remote);
 +                              schedstat_inc(sd->ttwu_wake_remote);
                                break;
                        }
                }
        }
  
        if (wake_flags & WF_MIGRATED)
 -              schedstat_inc(p, se.statistics.nr_wakeups_migrate);
 -
 +              schedstat_inc(p->se.statistics.nr_wakeups_migrate);
  #endif /* CONFIG_SMP */
  
 -      schedstat_inc(rqttwu_count);
 -      schedstat_inc(pse.statistics.nr_wakeups);
 +      schedstat_inc(rq->ttwu_count);
 +      schedstat_inc(p->se.statistics.nr_wakeups);
  
        if (wake_flags & WF_SYNC)
 -              schedstat_inc(p, se.statistics.nr_wakeups_sync);
 -
 -#endif /* CONFIG_SCHEDSTATS */
 +              schedstat_inc(p->se.statistics.nr_wakeups_sync);
  }
  
  static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
@@@ -2015,28 -2022,6 +2022,28 @@@ try_to_wake_up(struct task_struct *p, u
        success = 1; /* we're going to change ->state */
        cpu = task_cpu(p);
  
 +      /*
 +       * Ensure we load p->on_rq _after_ p->state, otherwise it would
 +       * be possible to, falsely, observe p->on_rq == 0 and get stuck
 +       * in smp_cond_load_acquire() below.
 +       *
 +       * sched_ttwu_pending()                 try_to_wake_up()
 +       *   [S] p->on_rq = 1;                  [L] P->state
 +       *       UNLOCK rq->lock  -----.
 +       *                              \
 +       *                               +---   RMB
 +       * schedule()                   /
 +       *       LOCK rq->lock    -----'
 +       *       UNLOCK rq->lock
 +       *
 +       * [task p]
 +       *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
 +       *
 +       * Pairs with the UNLOCK+LOCK on rq->lock from the
 +       * last wakeup of our task and the schedule that got our task
 +       * current.
 +       */
 +      smp_rmb();
        if (p->on_rq && ttwu_remote(p, wake_flags))
                goto stat;
  
  
        ttwu_queue(p, cpu, wake_flags);
  stat:
 -      if (schedstat_enabled())
 -              ttwu_stat(p, cpu, wake_flags);
 +      ttwu_stat(p, cpu, wake_flags);
  out:
        raw_spin_unlock_irqrestore(&p->pi_lock, flags);
  
  /**
   * try_to_wake_up_local - try to wake up a local task with rq lock held
   * @p: the thread to be awakened
 + * @cookie: context's cookie for pinning
   *
   * Put @p on the run-queue if it's not already there. The caller must
   * ensure that this_rq() is locked, @p is bound to this_rq() and not
@@@ -2132,7 -2117,8 +2139,7 @@@ static void try_to_wake_up_local(struc
                ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  
        ttwu_do_wakeup(rq, p, 0, cookie);
 -      if (schedstat_enabled())
 -              ttwu_stat(p, smp_processor_id(), 0);
 +      ttwu_stat(p, smp_processor_id(), 0);
  out:
        raw_spin_unlock(&p->pi_lock);
  }
@@@ -2992,23 -2978,6 +2999,23 @@@ DEFINE_PER_CPU(struct kernel_cpustat, k
  EXPORT_PER_CPU_SYMBOL(kstat);
  EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
  
 +/*
 + * The function fair_sched_class.update_curr accesses the struct curr
 + * and its field curr->exec_start; when called from task_sched_runtime(),
 + * we observe a high rate of cache misses in practice.
 + * Prefetching this data results in improved performance.
 + */
 +static inline void prefetch_curr_exec_start(struct task_struct *p)
 +{
 +#ifdef CONFIG_FAIR_GROUP_SCHED
 +      struct sched_entity *curr = (&p->se)->cfs_rq->curr;
 +#else
 +      struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
 +#endif
 +      prefetch(curr);
 +      prefetch(&curr->exec_start);
 +}
 +
  /*
   * Return accounted runtime for the task.
   * In case the task is currently running, return the runtime plus current's
@@@ -3043,7 -3012,6 +3050,7 @@@ unsigned long long task_sched_runtime(s
         * thread, breaking clock_gettime().
         */
        if (task_current(rq, p) && task_on_rq_queued(p)) {
 +              prefetch_curr_exec_start(p);
                update_rq_clock(rq);
                p->sched_class->update_curr(rq);
        }
@@@ -3190,9 -3158,6 +3197,9 @@@ static inline void preempt_latency_stop
   */
  static noinline void __schedule_bug(struct task_struct *prev)
  {
 +      /* Save this before calling printk(), since that will clobber it */
 +      unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
 +
        if (oops_in_progress)
                return;
  
        print_modules();
        if (irqs_disabled())
                print_irqtrace_events(prev);
 -#ifdef CONFIG_DEBUG_PREEMPT
 -      if (in_atomic_preempt_off()) {
 +      if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
 +          && in_atomic_preempt_off()) {
                pr_err("Preemption disabled at:");
 -              print_ip_sym(current->preempt_disable_ip);
 +              print_ip_sym(preempt_disable_ip);
                pr_cont("\n");
        }
 -#endif
        if (panic_on_warn)
                panic("scheduling while atomic\n");
  
@@@ -3234,7 -3200,7 +3241,7 @@@ static inline void schedule_debug(struc
  
        profile_hit(SCHED_PROFILING, __builtin_return_address(0));
  
 -      schedstat_inc(this_rq()sched_count);
 +      schedstat_inc(this_rq()->sched_count);
  }
  
  /*
@@@ -3403,6 -3369,7 +3410,6 @@@ static void __sched notrace __schedule(
  
        balance_callback(rq);
  }
 -STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
  
  static inline void sched_submit_work(struct task_struct *tsk)
  {
@@@ -4845,7 -4812,7 +4852,7 @@@ SYSCALL_DEFINE0(sched_yield
  {
        struct rq *rq = this_rq_lock();
  
 -      schedstat_inc(rqyld_count);
 +      schedstat_inc(rq->yld_count);
        current->sched_class->yield_task(rq);
  
        /*
@@@ -4996,7 -4963,7 +5003,7 @@@ again
  
        yielded = curr->sched_class->yield_to_task(rq, p, preempt);
        if (yielded) {
 -              schedstat_inc(rqyld_count);
 +              schedstat_inc(rq->yld_count);
                /*
                 * Make p's CPU reschedule; pick_next_entity takes care of
                 * fairness.
@@@ -5734,7 -5701,6 +5741,7 @@@ static int sd_degenerate(struct sched_d
                         SD_BALANCE_FORK |
                         SD_BALANCE_EXEC |
                         SD_SHARE_CPUCAPACITY |
 +                       SD_ASYM_CPUCAPACITY |
                         SD_SHARE_PKG_RESOURCES |
                         SD_SHARE_POWERDOMAIN)) {
                if (sd->groups != sd->groups->next)
@@@ -5765,7 -5731,6 +5772,7 @@@ sd_parent_degenerate(struct sched_domai
                                SD_BALANCE_NEWIDLE |
                                SD_BALANCE_FORK |
                                SD_BALANCE_EXEC |
 +                              SD_ASYM_CPUCAPACITY |
                                SD_SHARE_CPUCAPACITY |
                                SD_SHARE_PKG_RESOURCES |
                                SD_PREFER_SIBLING |
@@@ -6375,32 -6340,23 +6382,32 @@@ static int sched_domains_curr_level
  /*
   * SD_flags allowed in topology descriptions.
   *
 - * SD_SHARE_CPUCAPACITY      - describes SMT topologies
 - * SD_SHARE_PKG_RESOURCES - describes shared caches
 - * SD_NUMA                - describes NUMA topologies
 - * SD_SHARE_POWERDOMAIN   - describes shared power domain
 + * These flags are purely descriptive of the topology and do not prescribe
 + * behaviour. Behaviour is artificial and mapped in the below sd_init()
 + * function:
 + *
 + *   SD_SHARE_CPUCAPACITY   - describes SMT topologies
 + *   SD_SHARE_PKG_RESOURCES - describes shared caches
 + *   SD_NUMA                - describes NUMA topologies
 + *   SD_SHARE_POWERDOMAIN   - describes shared power domain
 + *   SD_ASYM_CPUCAPACITY    - describes mixed capacity topologies
 + *
 + * Odd one out, which beside describing the topology has a quirk also
 + * prescribes the desired behaviour that goes along with it:
   *
 - * Odd one out:
 - * SD_ASYM_PACKING        - describes SMT quirks
 + *   SD_ASYM_PACKING        - describes SMT quirks
   */
  #define TOPOLOGY_SD_FLAGS             \
        (SD_SHARE_CPUCAPACITY |         \
         SD_SHARE_PKG_RESOURCES |       \
         SD_NUMA |                      \
         SD_ASYM_PACKING |              \
 +       SD_ASYM_CPUCAPACITY |          \
         SD_SHARE_POWERDOMAIN)
  
  static struct sched_domain *
 -sd_init(struct sched_domain_topology_level *tl, int cpu)
 +sd_init(struct sched_domain_topology_level *tl,
 +      struct sched_domain *child, int cpu)
  {
        struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
        int sd_weight, sd_flags = 0;
                .smt_gain               = 0,
                .max_newidle_lb_cost    = 0,
                .next_decay_max_lb_cost = jiffies,
 +              .child                  = child,
  #ifdef CONFIG_SCHED_DEBUG
                .name                   = tl->name,
  #endif
         * Convert topological properties into behaviour.
         */
  
 +      if (sd->flags & SD_ASYM_CPUCAPACITY) {
 +              struct sched_domain *t = sd;
 +
 +              for_each_lower_domain(t)
 +                      t->flags |= SD_BALANCE_WAKE;
 +      }
 +
        if (sd->flags & SD_SHARE_CPUCAPACITY) {
                sd->flags |= SD_PREFER_SIBLING;
                sd->imbalance_pct = 110;
@@@ -6884,13 -6832,16 +6891,13 @@@ struct sched_domain *build_sched_domain
                const struct cpumask *cpu_map, struct sched_domain_attr *attr,
                struct sched_domain *child, int cpu)
  {
 -      struct sched_domain *sd = sd_init(tl, cpu);
 -      if (!sd)
 -              return child;
 +      struct sched_domain *sd = sd_init(tl, child, cpu);
  
        cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
        if (child) {
                sd->level = child->level + 1;
                sched_domain_level_max = max(sched_domain_level_max, sd->level);
                child->parent = sd;
 -              sd->child = child;
  
                if (!cpumask_subset(sched_domain_span(child),
                                    sched_domain_span(sd))) {
@@@ -6921,7 -6872,6 +6928,7 @@@ static int build_sched_domains(const st
        enum s_alloc alloc_state;
        struct sched_domain *sd;
        struct s_data d;
 +      struct rq *rq = NULL;
        int i, ret = -ENOMEM;
  
        alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
        /* Attach the domains */
        rcu_read_lock();
        for_each_cpu(i, cpu_map) {
 +              rq = cpu_rq(i);
                sd = *per_cpu_ptr(d.sd, i);
 +
 +              /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
 +              if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
 +                      WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
 +
                cpu_attach_domain(sd, d.rd, i);
        }
        rcu_read_unlock();
  
 +      if (rq) {
 +              pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
 +                      cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
 +      }
 +
        ret = 0;
  error:
        __free_domain_allocs(&d, alloc_state, cpu_map);
@@@ -7550,6 -7489,10 +7557,6 @@@ void __init sched_init(void
  
        set_load_weight(&init_task);
  
 -#ifdef CONFIG_PREEMPT_NOTIFIERS
 -      INIT_HLIST_HEAD(&init_task.preempt_notifiers);
 -#endif
 -
        /*
         * The boot idle thread does lazy MMU switching as well:
         */
@@@ -7615,7 -7558,6 +7622,7 @@@ EXPORT_SYMBOL(__might_sleep)
  void ___might_sleep(const char *file, int line, int preempt_offset)
  {
        static unsigned long prev_jiffy;        /* ratelimiting */
 +      unsigned long preempt_disable_ip;
  
        rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
        if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
                return;
        prev_jiffy = jiffies;
  
 +      /* Save this before calling printk(), since that will clobber it */
 +      preempt_disable_ip = get_preempt_disable_ip(current);
 +
        printk(KERN_ERR
                "BUG: sleeping function called from invalid context at %s:%d\n",
                        file, line);
        debug_show_held_locks(current);
        if (irqs_disabled())
                print_irqtrace_events(current);
 -#ifdef CONFIG_DEBUG_PREEMPT
 -      if (!preempt_count_equals(preempt_offset)) {
 +      if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
 +          && !preempt_count_equals(preempt_offset)) {
                pr_err("Preemption disabled at:");
 -              print_ip_sym(current->preempt_disable_ip);
 +              print_ip_sym(preempt_disable_ip);
                pr_cont("\n");
        }
 -#endif
        dump_stack();
 +      add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
  }
  EXPORT_SYMBOL(___might_sleep);
  #endif
@@@ -7671,10 -7610,12 +7678,10 @@@ void normalize_rt_tasks(void
                if (p->flags & PF_KTHREAD)
                        continue;
  
 -              p->se.exec_start                = 0;
 -#ifdef CONFIG_SCHEDSTATS
 -              p->se.statistics.wait_start     = 0;
 -              p->se.statistics.sleep_start    = 0;
 -              p->se.statistics.block_start    = 0;
 -#endif
 +              p->se.exec_start = 0;
 +              schedstat_set(p->se.statistics.wait_start,  0);
 +              schedstat_set(p->se.statistics.sleep_start, 0);
 +              schedstat_set(p->se.statistics.block_start, 0);
  
                if (!dl_task(p) && !rt_task(p)) {
                        /*
diff --combined lib/Kconfig.debug
index 2e2cca5092318faf62d10ed96a3b7d0a8b9cc129,ffc2826a092ce498aae1fc9a772acfaf7fc27e82..785f04c80fcfabfb4b3c14d2b5039a98abb21103
@@@ -1214,7 -1214,7 +1214,7 @@@ config DEBUG_BUGVERBOS
  
  config DEBUG_LIST
        bool "Debug linked list manipulation"
-       depends on DEBUG_KERNEL
+       depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
        help
          Enable this to turn on extended checks in the linked-list
          walking routines.
@@@ -1307,6 -1307,7 +1307,7 @@@ config TORTURE_TES
  config RCU_PERF_TEST
        tristate "performance tests for RCU"
        depends on DEBUG_KERNEL
+       depends on !UML
        select TORTURE_TEST
        select SRCU
        select TASKS_RCU
  config RCU_TORTURE_TEST
        tristate "torture tests for RCU"
        depends on DEBUG_KERNEL
+       depends on !UML
        select TORTURE_TEST
        select SRCU
        select TASKS_RCU
@@@ -1413,6 -1415,24 +1415,24 @@@ config RCU_TORTURE_TEST_SLOW_CLEANUP_DE
          This option specifies the number of jiffies to wait between
          each rcu_node structure cleanup operation.
  
+ config WAKE_TORTURE_TEST
+       tristate "Torture test for wakeups and CPU hotplug"
+       depends on DEBUG_KERNEL
+       depends on 64BIT
+       depends on TRACE_CLOCK
+       select TORTURE_TEST
+       default n
+       help
+         This option provides a kernel module that runs torture tests
+         on wakeups from timed waits in the presence of CPU hotplug.
+         The kernel module may be built after the fact on the running
+         kernel to be tested, if desired.
+         Say Y here if you want wakeup torture tests to be built into
+         the kernel.
+         Say M if you want the wakeup torture tests to build as a module.
+         Say N if you are unsure.
  config RCU_CPU_STALL_TIMEOUT
        int "RCU CPU stall timeout in seconds"
        depends on RCU_STALL_COMMON
@@@ -1686,6 -1706,24 +1706,6 @@@ config LATENCYTO
          Enable this option if you want to use the LatencyTOP tool
          to find out which userspace is blocking on what kernel operations.
  
 -config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 -      bool
 -
 -config DEBUG_STRICT_USER_COPY_CHECKS
 -      bool "Strict user copy size checks"
 -      depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 -      depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
 -      help
 -        Enabling this option turns a certain set of sanity checks for user
 -        copy operations into compile time failures.
 -
 -        The copy_from_user() etc checks are there to help test if there
 -        are sufficient security checks on the length argument of
 -        the copy operation, by having gcc prove that the argument is
 -        within bounds.
 -
 -        If unsure, say N.
 -
  source kernel/trace/Kconfig
  
  menu "Runtime Testing"
@@@ -1969,6 -2007,16 +1989,16 @@@ config TEST_STATIC_KEY
  
          If unsure, say N.
  
+ config BUG_ON_DATA_CORRUPTION
+       bool "Trigger a BUG when data corruption is detected"
+       select DEBUG_LIST
+       help
+         Select this option if the kernel should BUG when it encounters
+         data corruption in kernel memory structures when they get checked
+         for validity.
+         If unsure, say N.
  source "samples/Kconfig"
  
  source "lib/Kconfig.kgdb"
This page took 0.054944 seconds and 5 git commands to generate.