Merge remote-tracking branch 'rcu/rcu/next'

author Stephen Rothwell <sfr@canb.auug.org.au>

Tue, 13 Sep 2016 02:12:37 +0000 (12:12 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Tue, 13 Sep 2016 02:12:37 +0000 (12:12 +1000)
author Stephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 02:12:37 +0000 (12:12 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 02:12:37 +0000 (12:12 +1000)
diff --combined include/linux/bug.h

index 292d6a10b0c2683ecbd57858ca069e3dc0bb952e,51a486f4eb4c5c5eda6d062a306501eb4ee78c39..baff2e8fc8a82792045c3bb0112fb52cead7e6cb
--- 1/include/linux/bug.h
--- 2/include/linux/bug.h
+++ b/include/linux/bug.h
@@@ -13,7 -13,6 +13,7 @@@ enum bug_trap_type 
   struct pt_regs;
   
   #ifdef __CHECKER__
+ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
   #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
   #define BUILD_BUG_ON_ZERO(e) (0)
   #define BUILD_BUG_ON_NULL(e) ((void*)0)
@@@ -25,8 -24,6 +25,8 @@@
   #else /* __CHECKER__ */
   
   /* Force a compilation error if a constant expression is not a power of 2 */
+ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)      \
+ +      BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
   #define BUILD_BUG_ON_NOT_POWER_OF_2(n)                        \
         BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
   
@@@ -121,4 -118,21 +121,21 @@@ static inline enum bug_trap_type report
   }
   
   #endif        /* CONFIG_GENERIC_BUG */
+ 
+ /*
+  * Since detected data corruption should stop operation on the affected
+  * structures, this returns false if the corruption condition is found.
+  */
+ #define CHECK_DATA_CORRUPTION(condition, fmt, ...)                     \
+       do {                                                             \
+               if (unlikely(condition)) {                               \
+                       if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
+                               pr_err(fmt, ##__VA_ARGS__);              \
+                               BUG();                                   \
+                       } else                                           \
+                               WARN(1, fmt, ##__VA_ARGS__);             \
+                       return false;                                    \
+               }                                                        \
+       } while (0)
+ 
   #endif        /* _LINUX_BUG_H */
diff --combined kernel/cpu.c

index e7eca02c757f7ede7743cabb00aa64c787e90e7a,ff8bc3817dde87c3953863d0d61a785e5147f8df..5df20d6d152071b40244fb5d85279b8040a641ba
--- 1/kernel/cpu.c
--- 2/kernel/cpu.c
+++ b/kernel/cpu.c
@@@ -23,8 -23,6 +23,8 @@@
   #include <linux/tick.h>
   #include <linux/irq.h>
   #include <linux/smpboot.h>
+ +#include <linux/relay.h>
+ +#include <linux/slab.h>
   
   #include <trace/events/power.h>
   #define CREATE_TRACE_POINTS
@@@ -39,9 -37,8 +39,9 @@@
    * @thread:   Pointer to the hotplug thread
    * @should_run:       Thread should execute
    * @rollback: Perform a rollback
- - * @cb_stat:  The state for a single callback (install/uninstall)
- - * @cb:               Single callback function (install/uninstall)
+ + * @single:   Single callback invocation
+ + * @bringup:  Single callback bringup or teardown selector
+ + * @cb_state: The state for a single callback (install/uninstall)
    * @result:   Result of the operation
    * @done:     Signal completion to the issuer of the task
    */
@@@ -52,10 -49,8 +52,10 @@@ struct cpuhp_cpu_state 
         struct task_struct      *thread;
         bool                    should_run;
         bool                    rollback;
+ +      bool                    single;
+ +      bool                    bringup;
+ +      struct hlist_node       *node;
         enum cpuhp_state        cb_state;
- -      int                     (*cb)(unsigned int cpu);
         int                     result;
         struct completion       done;
   #endif
@@@ -73,103 -68,35 +73,103 @@@ static DEFINE_PER_CPU(struct cpuhp_cpu_
    * @cant_stop:        Bringup/teardown can't be stopped at this step
    */
   struct cpuhp_step {
- -      const char      *name;
- -      int             (*startup)(unsigned int cpu);
- -      int             (*teardown)(unsigned int cpu);
- -      bool            skip_onerr;
- -      bool            cant_stop;
+ +      const char              *name;
+ +      union {
+ +              int             (*single)(unsigned int cpu);
+ +              int             (*multi)(unsigned int cpu,
+ +                                       struct hlist_node *node);
+ +      } startup;
+ +      union {
+ +              int             (*single)(unsigned int cpu);
+ +              int             (*multi)(unsigned int cpu,
+ +                                       struct hlist_node *node);
+ +      } teardown;
+ +      struct hlist_head       list;
+ +      bool                    skip_onerr;
+ +      bool                    cant_stop;
+ +      bool                    multi_instance;
   };
   
   static DEFINE_MUTEX(cpuhp_state_mutex);
   static struct cpuhp_step cpuhp_bp_states[];
   static struct cpuhp_step cpuhp_ap_states[];
   
+ +static bool cpuhp_is_ap_state(enum cpuhp_state state)
+ +{
+ +      /*
+ +       * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
+ +       * purposes as that state is handled explicitly in cpu_down.
+ +       */
+ +      return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
+ +}
+ +
+ +static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
+ +{
+ +      struct cpuhp_step *sp;
+ +
+ +      sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
+ +      return sp + state;
+ +}
+ +
   /**
    * cpuhp_invoke_callback _ Invoke the callbacks for a given state
    * @cpu:      The cpu for which the callback should be invoked
    * @step:     The step in the state machine
- - * @cb:               The callback function to invoke
+ + * @bringup:  True if the bringup callback should be invoked
    *
- - * Called from cpu hotplug and from the state register machinery
+ + * Called from cpu hotplug and from the state register machinery.
    */
- -static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
- -                               int (*cb)(unsigned int))
+ +static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
+ +                               bool bringup, struct hlist_node *node)
   {
         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
- -      int ret = 0;
- -
- -      if (cb) {
- -              trace_cpuhp_enter(cpu, st->target, step, cb);
+ +      struct cpuhp_step *step = cpuhp_get_step(state);
+ +      int (*cbm)(unsigned int cpu, struct hlist_node *node);
+ +      int (*cb)(unsigned int cpu);
+ +      int ret, cnt;
+ +
+ +      if (!step->multi_instance) {
+ +              cb = bringup ? step->startup.single : step->teardown.single;
+ +              if (!cb)
+ +                      return 0;
+ +              trace_cpuhp_enter(cpu, st->target, state, cb);
                 ret = cb(cpu);
- -              trace_cpuhp_exit(cpu, st->state, step, ret);
+ +              trace_cpuhp_exit(cpu, st->state, state, ret);
+ +              return ret;
+ +      }
+ +      cbm = bringup ? step->startup.multi : step->teardown.multi;
+ +      if (!cbm)
+ +              return 0;
+ +
+ +      /* Single invocation for instance add/remove */
+ +      if (node) {
+ +              trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+ +              ret = cbm(cpu, node);
+ +              trace_cpuhp_exit(cpu, st->state, state, ret);
+ +              return ret;
+ +      }
+ +
+ +      /* State transition. Invoke on all instances */
+ +      cnt = 0;
+ +      hlist_for_each(node, &step->list) {
+ +              trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+ +              ret = cbm(cpu, node);
+ +              trace_cpuhp_exit(cpu, st->state, state, ret);
+ +              if (ret)
+ +                      goto err;
+ +              cnt++;
+ +      }
+ +      return 0;
+ +err:
+ +      /* Rollback the instances if one failed */
+ +      cbm = !bringup ? step->startup.multi : step->teardown.multi;
+ +      if (!cbm)
+ +              return ret;
+ +
+ +      hlist_for_each(node, &step->list) {
+ +              if (!cnt--)
+ +                      break;
+ +              cbm(cpu, node);
         }
         return ret;
   }
@@@ -333,17 -260,10 +333,17 @@@ void cpu_hotplug_disable(void
   }
   EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
   
+ +static void __cpu_hotplug_enable(void)
+ +{
+ +      if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
+ +              return;
+ +      cpu_hotplug_disabled--;
+ +}
+ +
   void cpu_hotplug_enable(void)
   {
         cpu_maps_update_begin();
- -      WARN_ON(--cpu_hotplug_disabled < 0);
+ +      __cpu_hotplug_enable();
         cpu_maps_update_done();
   }
   EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
@@@ -410,6 -330,12 +410,6 @@@ static int notify_online(unsigned int c
         return 0;
   }
   
- -static int notify_starting(unsigned int cpu)
- -{
- -      cpu_notify(CPU_STARTING, cpu);
- -      return 0;
- -}
- -
   static int bringup_wait_for_ap(unsigned int cpu)
   {
         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@@ -423,16 -349,8 +423,16 @@@ static int bringup_cpu(unsigned int cpu
         struct task_struct *idle = idle_thread_get(cpu);
         int ret;
   
+ +      /*
+ +       * Some architectures have to walk the irq descriptors to
+ +       * setup the vector space for the cpu which comes online.
+ +       * Prevent irq alloc/free across the bringup.
+ +       */
+ +      irq_lock_sparse();
+ +
         /* Arch-specific enabling code. */
         ret = __cpu_up(cpu, idle);
+ +      irq_unlock_sparse();
         if (ret) {
                 cpu_notify(CPU_UP_CANCELED, cpu);
                 return ret;
@@@ -445,55 -363,62 +445,55 @@@
   /*
    * Hotplug state machine related functions
    */
- -static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
- -                        struct cpuhp_step *steps)
+ +static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
   {
         for (st->state++; st->state < st->target; st->state++) {
- -              struct cpuhp_step *step = steps + st->state;
+ +              struct cpuhp_step *step = cpuhp_get_step(st->state);
   
                 if (!step->skip_onerr)
- -                      cpuhp_invoke_callback(cpu, st->state, step->startup);
+ +                      cpuhp_invoke_callback(cpu, st->state, true, NULL);
         }
   }
   
   static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
- -                              struct cpuhp_step *steps, enum cpuhp_state target)
+ +                              enum cpuhp_state target)
   {
         enum cpuhp_state prev_state = st->state;
         int ret = 0;
   
         for (; st->state > target; st->state--) {
- -              struct cpuhp_step *step = steps + st->state;
- -
- -              ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ +              ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
                 if (ret) {
                         st->target = prev_state;
- -                      undo_cpu_down(cpu, st, steps);
+ +                      undo_cpu_down(cpu, st);
                         break;
                 }
         }
         return ret;
   }
   
- -static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
- -                      struct cpuhp_step *steps)
+ +static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
   {
         for (st->state--; st->state > st->target; st->state--) {
- -              struct cpuhp_step *step = steps + st->state;
+ +              struct cpuhp_step *step = cpuhp_get_step(st->state);
   
                 if (!step->skip_onerr)
- -                      cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ +                      cpuhp_invoke_callback(cpu, st->state, false, NULL);
         }
   }
   
   static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
- -                            struct cpuhp_step *steps, enum cpuhp_state target)
+ +                            enum cpuhp_state target)
   {
         enum cpuhp_state prev_state = st->state;
         int ret = 0;
   
         while (st->state < target) {
- -              struct cpuhp_step *step;
- -
                 st->state++;
- -              step = steps + st->state;
- -              ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
+ +              ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
                 if (ret) {
                         st->target = prev_state;
- -                      undo_cpu_up(cpu, st, steps);
+ +                      undo_cpu_up(cpu, st);
                         break;
                 }
         }
@@@ -522,13 -447,13 +522,13 @@@ static int cpuhp_ap_offline(unsigned in
   {
         enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
   
- -      return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
+ +      return cpuhp_down_callbacks(cpu, st, target);
   }
   
   /* Execute the online startup callbacks. Used to be CPU_ONLINE */
   static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
   {
- -      return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
+ +      return cpuhp_up_callbacks(cpu, st, st->target);
   }
   
   /*
@@@ -551,20 -476,18 +551,20 @@@ static void cpuhp_thread_fun(unsigned i
         st->should_run = false;
   
         /* Single callback invocation for [un]install ? */
- -      if (st->cb) {
+ +      if (st->single) {
                 if (st->cb_state < CPUHP_AP_ONLINE) {
                         local_irq_disable();
- -                      ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+ +                      ret = cpuhp_invoke_callback(cpu, st->cb_state,
+ +                                                  st->bringup, st->node);
                         local_irq_enable();
                 } else {
- -                      ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+ +                      ret = cpuhp_invoke_callback(cpu, st->cb_state,
+ +                                                  st->bringup, st->node);
                 }
         } else if (st->rollback) {
                 BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
   
- -              undo_cpu_down(cpu, st, cpuhp_ap_states);
+ +              undo_cpu_down(cpu, st);
                 /*
                  * This is a momentary workaround to keep the notifier users
                  * happy. Will go away once we got rid of the notifiers.
@@@ -586,9 -509,8 +586,9 @@@
   }
   
   /* Invoke a single callback on a remote cpu */
- -static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
- -                                  int (*cb)(unsigned int))
+ +static int
+ +cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
+ +                       struct hlist_node *node)
   {
         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
   
@@@ -600,13 -522,10 +600,13 @@@
          * we invoke the thread function directly.
          */
         if (!st->thread)
- -              return cpuhp_invoke_callback(cpu, state, cb);
+ +              return cpuhp_invoke_callback(cpu, state, bringup, node);
   
         st->cb_state = state;
- -      st->cb = cb;
+ +      st->single = true;
+ +      st->bringup = bringup;
+ +      st->node = node;
+ +
         /*
          * Make sure the above stores are visible before should_run becomes
          * true. Paired with the mb() above in cpuhp_thread_fun()
@@@ -622,7 -541,7 +622,7 @@@
   static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
   {
         st->result = 0;
- -      st->cb = NULL;
+ +      st->single = false;
         /*
          * Make sure the above stores are visible before should_run becomes
          * true. Paired with the mb() above in cpuhp_thread_fun()
@@@ -755,6 -674,12 +755,6 @@@ static int notify_down_prepare(unsigne
         return err;
   }
   
- -static int notify_dying(unsigned int cpu)
- -{
- -      cpu_notify(CPU_DYING, cpu);
- -      return 0;
- -}
- -
   /* Take this CPU down. */
   static int take_cpu_down(void *_param)
   {
@@@ -767,16 -692,12 +767,16 @@@
         if (err < 0)
                 return err;
   
+ +      /*
+ +       * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
+ +       * do this step again.
+ +       */
+ +      WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
+ +      st->state--;
         /* Invoke the former CPU_DYING callbacks */
- -      for (; st->state > target; st->state--) {
- -              struct cpuhp_step *step = cpuhp_ap_states + st->state;
+ +      for (; st->state > target; st->state--)
+ +              cpuhp_invoke_callback(cpu, st->state, false, NULL);
   
- -              cpuhp_invoke_callback(cpu, st->state, step->teardown);
- -      }
         /* Give up timekeeping duties */
         tick_handover_do_timer();
         /* Park the stopper thread */
@@@ -813,7 -734,7 +813,7 @@@ static int takedown_cpu(unsigned int cp
         BUG_ON(cpu_online(cpu));
   
         /*
- -       * The migration_call() CPU_DYING callback will have removed all
+ +       * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
          * runnable tasks from the cpu, there's only the idle task left now
          * that the migration thread is done doing the stop_machine thing.
          *
@@@ -866,6 -787,7 +866,6 @@@ void cpuhp_report_idle_dead(void
   #define notify_down_prepare   NULL
   #define takedown_cpu          NULL
   #define notify_dead           NULL
- -#define notify_dying          NULL
   #endif
   
   #ifdef CONFIG_HOTPLUG_CPU
@@@ -914,7 -836,7 +914,7 @@@ static int __ref _cpu_down(unsigned in
          * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
          * to do the further cleanups.
          */
- -      ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+ +      ret = cpuhp_down_callbacks(cpu, st, target);
         if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
                 st->target = prev_state;
                 st->rollback = true;
@@@ -955,9 -877,10 +955,9 @@@ EXPORT_SYMBOL(cpu_down)
   #endif /*CONFIG_HOTPLUG_CPU*/
   
   /**
- - * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ + * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
    * @cpu: cpu that just started
    *
- - * This function calls the cpu_chain notifiers with CPU_STARTING.
    * It must be called by the arch code on the new cpu, before the new cpu
    * enables interrupts and before the "boot" cpu returns from __cpu_up().
    */
@@@ -966,9 -889,13 +966,10 @@@ void notify_cpu_starting(unsigned int c
         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
         enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
   
+       rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
         while (st->state < target) {
- -              struct cpuhp_step *step;
- -
                 st->state++;
- -              step = cpuhp_ap_states + st->state;
- -              cpuhp_invoke_callback(cpu, st->state, step->startup);
+ +              cpuhp_invoke_callback(cpu, st->state, true, NULL);
         }
   }
   
@@@ -1053,7 -980,7 +1054,7 @@@ static int _cpu_up(unsigned int cpu, in
          * responsible for bringing it up to the target state.
          */
         target = min((int)target, CPUHP_BRINGUP_CPU);
- -      ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
+ +      ret = cpuhp_up_callbacks(cpu, st, target);
   out:
         cpu_hotplug_done();
         return ret;
@@@ -1098,13 -1025,12 +1099,13 @@@ EXPORT_SYMBOL_GPL(cpu_up)
   #ifdef CONFIG_PM_SLEEP_SMP
   static cpumask_var_t frozen_cpus;
   
- -int disable_nonboot_cpus(void)
+ +int freeze_secondary_cpus(int primary)
   {
- -      int cpu, first_cpu, error = 0;
+ +      int cpu, error = 0;
   
         cpu_maps_update_begin();
- -      first_cpu = cpumask_first(cpu_online_mask);
+ +      if (!cpu_online(primary))
+ +              primary = cpumask_first(cpu_online_mask);
         /*
          * We take down all of the non-boot CPUs in one shot to avoid races
          * with the userspace trying to use the CPU hotplug at the same time
@@@ -1113,7 -1039,7 +1114,7 @@@
   
         pr_info("Disabling non-boot CPUs ...\n");
         for_each_online_cpu(cpu) {
- -              if (cpu == first_cpu)
+ +              if (cpu == primary)
                         continue;
                 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
                 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
@@@ -1156,7 -1082,7 +1157,7 @@@ void enable_nonboot_cpus(void
   
         /* Allow everyone to use the CPU hotplug again */
         cpu_maps_update_begin();
- -      WARN_ON(--cpu_hotplug_disabled < 0);
+ +      __cpu_hotplug_enable();
         if (cpumask_empty(frozen_cpus))
                 goto out;
   
@@@ -1245,50 -1171,40 +1246,50 @@@ core_initcall(cpu_hotplug_pm_sync_init)
   static struct cpuhp_step cpuhp_bp_states[] = {
         [CPUHP_OFFLINE] = {
                 .name                   = "offline",
- -              .startup                = NULL,
- -              .teardown               = NULL,
+ +              .startup.single         = NULL,
+ +              .teardown.single        = NULL,
         },
   #ifdef CONFIG_SMP
         [CPUHP_CREATE_THREADS]= {
- -              .name                   = "threads:create",
- -              .startup                = smpboot_create_threads,
- -              .teardown               = NULL,
+ +              .name                   = "threads:prepare",
+ +              .startup.single         = smpboot_create_threads,
+ +              .teardown.single        = NULL,
                 .cant_stop              = true,
         },
         [CPUHP_PERF_PREPARE] = {
- -              .name = "perf prepare",
- -              .startup = perf_event_init_cpu,
- -              .teardown = perf_event_exit_cpu,
+ +              .name                   = "perf:prepare",
+ +              .startup.single         = perf_event_init_cpu,
+ +              .teardown.single        = perf_event_exit_cpu,
         },
         [CPUHP_WORKQUEUE_PREP] = {
- -              .name = "workqueue prepare",
- -              .startup = workqueue_prepare_cpu,
- -              .teardown = NULL,
+ +              .name                   = "workqueue:prepare",
+ +              .startup.single         = workqueue_prepare_cpu,
+ +              .teardown.single        = NULL,
         },
         [CPUHP_HRTIMERS_PREPARE] = {
- -              .name = "hrtimers prepare",
- -              .startup = hrtimers_prepare_cpu,
- -              .teardown = hrtimers_dead_cpu,
+ +              .name                   = "hrtimers:prepare",
+ +              .startup.single         = hrtimers_prepare_cpu,
+ +              .teardown.single        = hrtimers_dead_cpu,
         },
         [CPUHP_SMPCFD_PREPARE] = {
- -              .name = "SMPCFD prepare",
- -              .startup = smpcfd_prepare_cpu,
- -              .teardown = smpcfd_dead_cpu,
+ +              .name                   = "smpcfd:prepare",
+ +              .startup.single         = smpcfd_prepare_cpu,
+ +              .teardown.single        = smpcfd_dead_cpu,
+ +      },
+ +      [CPUHP_RELAY_PREPARE] = {
+ +              .name                   = "relay:prepare",
+ +              .startup.single         = relay_prepare_cpu,
+ +              .teardown.single        = NULL,
+ +      },
+ +      [CPUHP_SLAB_PREPARE] = {
+ +              .name                   = "slab:prepare",
+ +              .startup.single         = slab_prepare_cpu,
+ +              .teardown.single        = slab_dead_cpu,
         },
         [CPUHP_RCUTREE_PREP] = {
- -              .name = "RCU-tree prepare",
- -              .startup = rcutree_prepare_cpu,
- -              .teardown = rcutree_dead_cpu,
+ +              .name                   = "RCU/tree:prepare",
+ +              .startup.single         = rcutree_prepare_cpu,
+ +              .teardown.single        = rcutree_dead_cpu,
         },
         /*
          * Preparatory and dead notifiers. Will be replaced once the notifiers
@@@ -1296,8 -1212,8 +1297,8 @@@
          */
         [CPUHP_NOTIFY_PREPARE] = {
                 .name                   = "notify:prepare",
- -              .startup                = notify_prepare,
- -              .teardown               = notify_dead,
+ +              .startup.single         = notify_prepare,
+ +              .teardown.single        = notify_dead,
                 .skip_onerr             = true,
                 .cant_stop              = true,
         },
@@@ -1307,21 -1223,20 +1308,21 @@@
          * otherwise a RCU stall occurs.
          */
         [CPUHP_TIMERS_DEAD] = {
- -              .name = "timers dead",
- -              .startup = NULL,
- -              .teardown = timers_dead_cpu,
+ +              .name                   = "timers:dead",
+ +              .startup.single         = NULL,
+ +              .teardown.single        = timers_dead_cpu,
         },
         /* Kicks the plugged cpu into life */
         [CPUHP_BRINGUP_CPU] = {
                 .name                   = "cpu:bringup",
- -              .startup                = bringup_cpu,
- -              .teardown               = NULL,
+ +              .startup.single         = bringup_cpu,
+ +              .teardown.single        = NULL,
                 .cant_stop              = true,
         },
         [CPUHP_AP_SMPCFD_DYING] = {
- -              .startup = NULL,
- -              .teardown = smpcfd_dying_cpu,
+ +              .name                   = "smpcfd:dying",
+ +              .startup.single         = NULL,
+ +              .teardown.single        = smpcfd_dying_cpu,
         },
         /*
          * Handled on controll processor until the plugged processor manages
@@@ -1329,8 -1244,8 +1330,8 @@@
          */
         [CPUHP_TEARDOWN_CPU] = {
                 .name                   = "cpu:teardown",
- -              .startup                = NULL,
- -              .teardown               = takedown_cpu,
+ +              .startup.single         = NULL,
+ +              .teardown.single        = takedown_cpu,
                 .cant_stop              = true,
         },
   #else
@@@ -1356,13 -1271,24 +1357,13 @@@ static struct cpuhp_step cpuhp_ap_state
         /* First state is scheduler control. Interrupts are disabled */
         [CPUHP_AP_SCHED_STARTING] = {
                 .name                   = "sched:starting",
- -              .startup                = sched_cpu_starting,
- -              .teardown               = sched_cpu_dying,
+ +              .startup.single         = sched_cpu_starting,
+ +              .teardown.single        = sched_cpu_dying,
         },
         [CPUHP_AP_RCUTREE_DYING] = {
- -              .startup = NULL,
- -              .teardown = rcutree_dying_cpu,
- -      },
- -      /*
- -       * Low level startup/teardown notifiers. Run with interrupts
- -       * disabled. Will be removed once the notifiers are converted to
- -       * states.
- -       */
- -      [CPUHP_AP_NOTIFY_STARTING] = {
- -              .name                   = "notify:starting",
- -              .startup                = notify_starting,
- -              .teardown               = notify_dying,
- -              .skip_onerr             = true,
- -              .cant_stop              = true,
+ +              .name                   = "RCU/tree:dying",
+ +              .startup.single         = NULL,
+ +              .teardown.single        = rcutree_dying_cpu,
         },
         /* Entry state on starting. Interrupts enabled from here on. Transient
          * state for synchronsization */
@@@ -1371,24 -1297,24 +1372,24 @@@
         },
         /* Handle smpboot threads park/unpark */
         [CPUHP_AP_SMPBOOT_THREADS] = {
- -              .name                   = "smpboot:threads",
- -              .startup                = smpboot_unpark_threads,
- -              .teardown               = NULL,
+ +              .name                   = "smpboot/threads:online",
+ +              .startup.single         = smpboot_unpark_threads,
+ +              .teardown.single        = NULL,
         },
         [CPUHP_AP_PERF_ONLINE] = {
- -              .name = "perf online",
- -              .startup = perf_event_init_cpu,
- -              .teardown = perf_event_exit_cpu,
+ +              .name                   = "perf:online",
+ +              .startup.single         = perf_event_init_cpu,
+ +              .teardown.single        = perf_event_exit_cpu,
         },
         [CPUHP_AP_WORKQUEUE_ONLINE] = {
- -              .name = "workqueue online",
- -              .startup = workqueue_online_cpu,
- -              .teardown = workqueue_offline_cpu,
+ +              .name                   = "workqueue:online",
+ +              .startup.single         = workqueue_online_cpu,
+ +              .teardown.single        = workqueue_offline_cpu,
         },
         [CPUHP_AP_RCUTREE_ONLINE] = {
- -              .name = "RCU-tree online",
- -              .startup = rcutree_online_cpu,
- -              .teardown = rcutree_offline_cpu,
+ +              .name                   = "RCU/tree:online",
+ +              .startup.single         = rcutree_online_cpu,
+ +              .teardown.single        = rcutree_offline_cpu,
         },
   
         /*
@@@ -1397,8 -1323,8 +1398,8 @@@
          */
         [CPUHP_AP_NOTIFY_ONLINE] = {
                 .name                   = "notify:online",
- -              .startup                = notify_online,
- -              .teardown               = notify_down_prepare,
+ +              .startup.single         = notify_online,
+ +              .teardown.single        = notify_down_prepare,
                 .skip_onerr             = true,
         },
   #endif
@@@ -1410,16 -1336,16 +1411,16 @@@
         /* Last state is scheduler control setting the cpu active */
         [CPUHP_AP_ACTIVE] = {
                 .name                   = "sched:active",
- -              .startup                = sched_cpu_activate,
- -              .teardown               = sched_cpu_deactivate,
+ +              .startup.single         = sched_cpu_activate,
+ +              .teardown.single        = sched_cpu_deactivate,
         },
   #endif
   
         /* CPU is fully up and running. */
         [CPUHP_ONLINE] = {
                 .name                   = "online",
- -              .startup                = NULL,
- -              .teardown               = NULL,
+ +              .startup.single         = NULL,
+ +              .teardown.single        = NULL,
         },
   };
   
@@@ -1431,42 -1357,54 +1432,42 @@@ static int cpuhp_cb_check(enum cpuhp_st
         return 0;
   }
   
- -static bool cpuhp_is_ap_state(enum cpuhp_state state)
- -{
- -      /*
- -       * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
- -       * purposes as that state is handled explicitely in cpu_down.
- -       */
- -      return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
- -}
- -
- -static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
- -{
- -      struct cpuhp_step *sp;
- -
- -      sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
- -      return sp + state;
- -}
- -
   static void cpuhp_store_callbacks(enum cpuhp_state state,
                                   const char *name,
                                   int (*startup)(unsigned int cpu),
- -                                int (*teardown)(unsigned int cpu))
+ +                                int (*teardown)(unsigned int cpu),
+ +                                bool multi_instance)
   {
         /* (Un)Install the callbacks for further cpu hotplug operations */
         struct cpuhp_step *sp;
   
         mutex_lock(&cpuhp_state_mutex);
         sp = cpuhp_get_step(state);
- -      sp->startup = startup;
- -      sp->teardown = teardown;
+ +      sp->startup.single = startup;
+ +      sp->teardown.single = teardown;
         sp->name = name;
+ +      sp->multi_instance = multi_instance;
+ +      INIT_HLIST_HEAD(&sp->list);
         mutex_unlock(&cpuhp_state_mutex);
   }
   
   static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
   {
- -      return cpuhp_get_step(state)->teardown;
+ +      return cpuhp_get_step(state)->teardown.single;
   }
   
   /*
    * Call the startup/teardown function for a step either on the AP or
    * on the current CPU.
    */
- -static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
- -                          int (*cb)(unsigned int), bool bringup)
+ +static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
+ +                          struct hlist_node *node)
   {
+ +      struct cpuhp_step *sp = cpuhp_get_step(state);
         int ret;
   
- -      if (!cb)
+ +      if ((bringup && !sp->startup.single) ||
+ +          (!bringup && !sp->teardown.single))
                 return 0;
         /*
          * The non AP bound callbacks can fail on bringup. On teardown
@@@ -1474,11 -1412,11 +1475,11 @@@
          */
   #ifdef CONFIG_SMP
         if (cpuhp_is_ap_state(state))
- -              ret = cpuhp_invoke_ap_callback(cpu, state, cb);
+ +              ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
         else
- -              ret = cpuhp_invoke_callback(cpu, state, cb);
+ +              ret = cpuhp_invoke_callback(cpu, state, bringup, node);
   #else
- -      ret = cpuhp_invoke_callback(cpu, state, cb);
+ +      ret = cpuhp_invoke_callback(cpu, state, bringup, node);
   #endif
         BUG_ON(ret && !bringup);
         return ret;
@@@ -1490,10 -1428,13 +1491,10 @@@
    * Note: The teardown callbacks for rollback are not allowed to fail!
    */
   static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
- -                                 int (*teardown)(unsigned int cpu))
+ +                                 struct hlist_node *node)
   {
         int cpu;
   
- -      if (!teardown)
- -              return;
- -
         /* Roll back the already executed steps on the other cpus */
         for_each_present_cpu(cpu) {
                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@@ -1504,7 -1445,7 +1505,7 @@@
   
                 /* Did we invoke the startup call on that cpu ? */
                 if (cpustate >= state)
- -                      cpuhp_issue_call(cpu, state, teardown, false);
+ +                      cpuhp_issue_call(cpu, state, false, node);
         }
   }
   
@@@ -1531,52 -1472,6 +1532,52 @@@ static int cpuhp_reserve_state(enum cpu
         return -ENOSPC;
   }
   
+ +int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+ +                             bool invoke)
+ +{
+ +      struct cpuhp_step *sp;
+ +      int cpu;
+ +      int ret;
+ +
+ +      sp = cpuhp_get_step(state);
+ +      if (sp->multi_instance == false)
+ +              return -EINVAL;
+ +
+ +      get_online_cpus();
+ +
+ +      if (!invoke || !sp->startup.multi)
+ +              goto add_node;
+ +
+ +      /*
+ +       * Try to call the startup callback for each present cpu
+ +       * depending on the hotplug state of the cpu.
+ +       */
+ +      for_each_present_cpu(cpu) {
+ +              struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ +              int cpustate = st->state;
+ +
+ +              if (cpustate < state)
+ +                      continue;
+ +
+ +              ret = cpuhp_issue_call(cpu, state, true, node);
+ +              if (ret) {
+ +                      if (sp->teardown.multi)
+ +                              cpuhp_rollback_install(cpu, state, node);
+ +                      goto err;
+ +              }
+ +      }
+ +add_node:
+ +      ret = 0;
+ +      mutex_lock(&cpuhp_state_mutex);
+ +      hlist_add_head(node, &sp->list);
+ +      mutex_unlock(&cpuhp_state_mutex);
+ +
+ +err:
+ +      put_online_cpus();
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
+ +
   /**
    * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
    * @state:    The state to setup
@@@ -1590,8 -1485,7 +1591,8 @@@
   int __cpuhp_setup_state(enum cpuhp_state state,
                         const char *name, bool invoke,
                         int (*startup)(unsigned int cpu),
- -                      int (*teardown)(unsigned int cpu))
+ +                      int (*teardown)(unsigned int cpu),
+ +                      bool multi_instance)
   {
         int cpu, ret = 0;
         int dyn_state = 0;
@@@ -1610,7 -1504,7 +1611,7 @@@
                 state = ret;
         }
   
- -      cpuhp_store_callbacks(state, name, startup, teardown);
+ +      cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
   
         if (!invoke || !startup)
                 goto out;
@@@ -1626,11 -1520,10 +1627,11 @@@
                 if (cpustate < state)
                         continue;
   
- -              ret = cpuhp_issue_call(cpu, state, startup, true);
+ +              ret = cpuhp_issue_call(cpu, state, true, NULL);
                 if (ret) {
- -                      cpuhp_rollback_install(cpu, state, teardown);
- -                      cpuhp_store_callbacks(state, NULL, NULL, NULL);
+ +                      if (teardown)
+ +                              cpuhp_rollback_install(cpu, state, NULL);
+ +                      cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
                         goto out;
                 }
         }
@@@ -1642,42 -1535,6 +1643,42 @@@ out
   }
   EXPORT_SYMBOL(__cpuhp_setup_state);
   
+ +int __cpuhp_state_remove_instance(enum cpuhp_state state,
+ +                                struct hlist_node *node, bool invoke)
+ +{
+ +      struct cpuhp_step *sp = cpuhp_get_step(state);
+ +      int cpu;
+ +
+ +      BUG_ON(cpuhp_cb_check(state));
+ +
+ +      if (!sp->multi_instance)
+ +              return -EINVAL;
+ +
+ +      get_online_cpus();
+ +      if (!invoke || !cpuhp_get_teardown_cb(state))
+ +              goto remove;
+ +      /*
+ +       * Call the teardown callback for each present cpu depending
+ +       * on the hotplug state of the cpu. This function is not
+ +       * allowed to fail currently!
+ +       */
+ +      for_each_present_cpu(cpu) {
+ +              struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ +              int cpustate = st->state;
+ +
+ +              if (cpustate >= state)
+ +                      cpuhp_issue_call(cpu, state, false, node);
+ +      }
+ +
+ +remove:
+ +      mutex_lock(&cpuhp_state_mutex);
+ +      hlist_del(node);
+ +      mutex_unlock(&cpuhp_state_mutex);
+ +      put_online_cpus();
+ +
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
   /**
    * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
    * @state:    The state to remove
@@@ -1689,21 -1546,14 +1690,21 @@@
    */
   void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
   {
- -      int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
+ +      struct cpuhp_step *sp = cpuhp_get_step(state);
         int cpu;
   
         BUG_ON(cpuhp_cb_check(state));
   
         get_online_cpus();
   
- -      if (!invoke || !teardown)
+ +      if (sp->multi_instance) {
+ +              WARN(!hlist_empty(&sp->list),
+ +                   "Error: Removing state %d which has instances left.\n",
+ +                   state);
+ +              goto remove;
+ +      }
+ +
+ +      if (!invoke || !cpuhp_get_teardown_cb(state))
                 goto remove;
   
         /*
@@@ -1716,10 -1566,10 +1717,10 @@@
                 int cpustate = st->state;
   
                 if (cpustate >= state)
- -                      cpuhp_issue_call(cpu, state, teardown, false);
+ +                      cpuhp_issue_call(cpu, state, false, NULL);
         }
   remove:
- -      cpuhp_store_callbacks(state, NULL, NULL, NULL);
+ +      cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
         put_online_cpus();
   }
   EXPORT_SYMBOL(__cpuhp_remove_state);
diff --combined kernel/rcu/tree.c

index e5164deb51e1e2e7f8079e41fed5cc2678ea4b4b,fead485b21f3c3e345bea82e43353343072df816..96c52e43f7cac0e5d6b41004c0c72d269c351f4a
--- 1/kernel/rcu/tree.c
--- 2/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@@ -41,7 -41,6 +41,6 @@@
   #include <linux/export.h>
   #include <linux/completion.h>
   #include <linux/moduleparam.h>
- #include <linux/module.h>
   #include <linux/percpu.h>
   #include <linux/notifier.h>
   #include <linux/cpu.h>
@@@ -60,7 -59,6 +59,6 @@@
   #include "tree.h"
   #include "rcu.h"
   
- MODULE_ALIAS("rcutree");
   #ifdef MODULE_PARAM_PREFIX
   #undef MODULE_PARAM_PREFIX
   #endif
@@@ -1306,7 -1304,8 +1304,8 @@@ static void rcu_stall_kick_kthreads(str
         if (!rcu_kick_kthreads)
                 return;
         j = READ_ONCE(rsp->jiffies_kick_kthreads);
-       if (time_after(jiffies, j) && rsp->gp_kthread) {
+       if (time_after(jiffies, j) && rsp->gp_kthread &&
+           (rcu_gp_in_progress(rsp) || READ_ONCE(rsp->gp_flags))) {
                 WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name);
                 rcu_ftrace_dump(DUMP_ALL);
                 wake_up_process(rsp->gp_kthread);
@@@ -1848,6 -1847,7 +1847,7 @@@ static bool __note_gp_changes(struct rc
                               struct rcu_data *rdp)
   {
         bool ret;
+       bool need_gp;
   
         /* Handle the ends of any preceding grace periods first. */
         if (rdp->completed == rnp->completed &&
@@@ -1874,9 -1874,10 +1874,10 @@@
                  */
                 rdp->gpnum = rnp->gpnum;
                 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
-               rdp->cpu_no_qs.b.norm = true;
+               need_gp = !!(rnp->qsmask & rdp->grpmask);
+               rdp->cpu_no_qs.b.norm = need_gp;
                 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
-               rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask);
+               rdp->core_needs_qs = need_gp;
                 zero_cpu_stall_ticks(rdp);
                 WRITE_ONCE(rdp->gpwrap, false);
         }
@@@ -2344,7 -2345,7 +2345,7 @@@ static void rcu_report_qs_rsp(struct rc
         WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
         WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
         raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
-       swake_up(&rsp->gp_wq);  /* Memory barrier implied by swake_up() path. */
+       rcu_gp_kthread_wake(rsp);
   }
   
   /*
@@@ -2828,8 -2829,7 +2829,7 @@@ static void rcu_do_batch(struct rcu_sta
    * Also schedule RCU core processing.
    *
    * This function must be called from hardirq context.  It is normally
-  * invoked from the scheduling-clock interrupt.  If rcu_pending returns
-  * false, there is no point in invoking rcu_check_callbacks().
+  * invoked from the scheduling-clock interrupt.
    */
   void rcu_check_callbacks(int user)
   {
@@@ -2970,7 -2970,7 +2970,7 @@@ static void force_quiescent_state(struc
         }
         WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
         raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
-       swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
+       rcu_gp_kthread_wake(rsp);
   }
   
   /*
@@@ -3013,7 -3013,7 +3013,7 @@@ __rcu_process_callbacks(struct rcu_stat
   /*
    * Do RCU core processing for the current CPU.
    */
- -static void rcu_process_callbacks(struct softirq_action *unused)
+ +static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
   {
         struct rcu_state *rsp;
   
@@@ -3121,7 -3121,9 +3121,9 @@@ __call_rcu(struct rcu_head *head, rcu_c
         unsigned long flags;
         struct rcu_data *rdp;
   
-       WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */
+       /* Misaligned rcu_head! */
+       WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
+ 
         if (debug_rcu_head_queue(head)) {
                 /* Probable double call_rcu(), so leak the callback. */
                 WRITE_ONCE(head->func, rcu_leak_callback);
@@@ -3130,13 -3132,6 +3132,6 @@@
         }
         head->func = func;
         head->next = NULL;
- 
-       /*
-        * Opportunistically note grace-period endings and beginnings.
-        * Note that we might see a beginning right after we see an
-        * end, but never vice versa, since this CPU has to pass through
-        * a quiescent state betweentimes.
-        */
         local_irq_save(flags);
         rdp = this_cpu_ptr(rsp->rda);
   
@@@ -3792,8 -3787,6 +3787,6 @@@ rcu_init_percpu_data(int cpu, struct rc
         rnp = rdp->mynode;
         mask = rdp->grpmask;
         raw_spin_lock_rcu_node(rnp);            /* irqs already disabled. */
-       rnp->qsmaskinitnext |= mask;
-       rnp->expmaskinitnext |= mask;
         if (!rdp->beenonline)
                 WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
         rdp->beenonline = true;  /* We have now been online. */
@@@ -3860,6 -3853,32 +3853,32 @@@ int rcutree_dead_cpu(unsigned int cpu
         return 0;
   }
   
+ /*
+  * Mark the specified CPU as being online so that subsequent grace periods
+  * (both expedited and normal) will wait on it.  Note that this means that
+  * incoming CPUs are not allowed to use RCU read-side critical sections
+  * until this function is called.  Failing to observe this restriction
+  * will result in lockdep splats.
+  */
+ void rcu_cpu_starting(unsigned int cpu)
+ {
+       unsigned long flags;
+       unsigned long mask;
+       struct rcu_data *rdp;
+       struct rcu_node *rnp;
+       struct rcu_state *rsp;
+ 
+       for_each_rcu_flavor(rsp) {
+               rdp = this_cpu_ptr(rsp->rda);
+               rnp = rdp->mynode;
+               mask = rdp->grpmask;
+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
+               rnp->qsmaskinitnext |= mask;
+               rnp->expmaskinitnext |= mask;
+               raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+       }
+ }
+ 
   #ifdef CONFIG_HOTPLUG_CPU
   /*
    * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
@@@ -4209,8 -4228,10 +4228,10 @@@ void __init rcu_init(void
          * or the scheduler are operational.
          */
         pm_notifier(rcu_pm_notify, 0);
-       for_each_online_cpu(cpu)
+       for_each_online_cpu(cpu) {
                 rcutree_prepare_cpu(cpu);
+               rcu_cpu_starting(cpu);
+       }
   }
   
   #include "tree_exp.h"
diff --combined kernel/sched/core.c

index a0086a5fc00893d63f1b0c42ac85f63508d07a2d,2a18856f00ab906b6b553ed58c37c8e4f04397bb..f5f7b3cdf0be0577d69fc44e4390e7d9a1e75c88
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -74,7 -74,6 +74,7 @@@
   #include <linux/context_tracking.h>
   #include <linux/compiler.h>
   #include <linux/frame.h>
+ +#include <linux/prefetch.h>
   
   #include <asm/switch_to.h>
   #include <asm/tlb.h>
@@@ -581,6 -580,8 +581,8 @@@ static bool wake_up_full_nohz_cpu(int c
          * If needed we can still optimize that later with an
          * empty IRQ.
          */
+       if (cpu_is_offline(cpu))
+               return true;  /* Don't try to wake offline CPUs. */
         if (tick_nohz_full_cpu(cpu)) {
                 if (cpu != smp_processor_id() ||
                     tick_nohz_tick_stopped())
@@@ -591,6 -592,11 +593,11 @@@
         return false;
   }
   
+ /*
+  * Wake up the specified CPU.  If the CPU is going offline, it is the
+  * caller's responsibility to deal with the lost wakeup, for example,
+  * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
+  */
   void wake_up_nohz_cpu(int cpu)
   {
         if (!wake_up_full_nohz_cpu(cpu))
@@@ -1265,7 -1271,7 +1272,7 @@@ static void __migrate_swap_task(struct 
                 /*
                  * Task isn't running anymore; make it appear like we migrated
                  * it before it went to sleep. This means on wakeup we make the
- -               * previous cpu our targer instead of where it really is.
+ +               * previous cpu our target instead of where it really is.
                  */
                 p->wake_cpu = cpu;
         }
@@@ -1629,25 -1635,23 +1636,25 @@@ static inline int __set_cpus_allowed_pt
   static void
   ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
   {
- -#ifdef CONFIG_SCHEDSTATS
- -      struct rq *rq = this_rq();
+ +      struct rq *rq;
   
- -#ifdef CONFIG_SMP
- -      int this_cpu = smp_processor_id();
+ +      if (!schedstat_enabled())
+ +              return;
+ +
+ +      rq = this_rq();
   
- -      if (cpu == this_cpu) {
- -              schedstat_inc(rq, ttwu_local);
- -              schedstat_inc(p, se.statistics.nr_wakeups_local);
+ +#ifdef CONFIG_SMP
+ +      if (cpu == rq->cpu) {
+ +              schedstat_inc(rq->ttwu_local);
+ +              schedstat_inc(p->se.statistics.nr_wakeups_local);
         } else {
                 struct sched_domain *sd;
   
- -              schedstat_inc(p, se.statistics.nr_wakeups_remote);
+ +              schedstat_inc(p->se.statistics.nr_wakeups_remote);
                 rcu_read_lock();
- -              for_each_domain(this_cpu, sd) {
+ +              for_each_domain(rq->cpu, sd) {
                         if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
- -                              schedstat_inc(sd, ttwu_wake_remote);
+ +                              schedstat_inc(sd->ttwu_wake_remote);
                                 break;
                         }
                 }
@@@ -1655,14 -1659,17 +1662,14 @@@
         }
   
         if (wake_flags & WF_MIGRATED)
- -              schedstat_inc(p, se.statistics.nr_wakeups_migrate);
- -
+ +              schedstat_inc(p->se.statistics.nr_wakeups_migrate);
   #endif /* CONFIG_SMP */
   
- -      schedstat_inc(rq, ttwu_count);
- -      schedstat_inc(p, se.statistics.nr_wakeups);
+ +      schedstat_inc(rq->ttwu_count);
+ +      schedstat_inc(p->se.statistics.nr_wakeups);
   
         if (wake_flags & WF_SYNC)
- -              schedstat_inc(p, se.statistics.nr_wakeups_sync);
- -
- -#endif /* CONFIG_SCHEDSTATS */
+ +              schedstat_inc(p->se.statistics.nr_wakeups_sync);
   }
   
   static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
@@@ -2015,28 -2022,6 +2022,28 @@@ try_to_wake_up(struct task_struct *p, u
         success = 1; /* we're going to change ->state */
         cpu = task_cpu(p);
   
+ +      /*
+ +       * Ensure we load p->on_rq _after_ p->state, otherwise it would
+ +       * be possible to, falsely, observe p->on_rq == 0 and get stuck
+ +       * in smp_cond_load_acquire() below.
+ +       *
+ +       * sched_ttwu_pending()                 try_to_wake_up()
+ +       *   [S] p->on_rq = 1;                  [L] P->state
+ +       *       UNLOCK rq->lock  -----.
+ +       *                              \
+ +       *                               +---   RMB
+ +       * schedule()                   /
+ +       *       LOCK rq->lock    -----'
+ +       *       UNLOCK rq->lock
+ +       *
+ +       * [task p]
+ +       *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+ +       *
+ +       * Pairs with the UNLOCK+LOCK on rq->lock from the
+ +       * last wakeup of our task and the schedule that got our task
+ +       * current.
+ +       */
+ +      smp_rmb();
         if (p->on_rq && ttwu_remote(p, wake_flags))
                 goto stat;
   
@@@ -2083,7 -2068,8 +2090,7 @@@
   
         ttwu_queue(p, cpu, wake_flags);
   stat:
- -      if (schedstat_enabled())
- -              ttwu_stat(p, cpu, wake_flags);
+ +      ttwu_stat(p, cpu, wake_flags);
   out:
         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
   
@@@ -2093,7 -2079,6 +2100,7 @@@
   /**
    * try_to_wake_up_local - try to wake up a local task with rq lock held
    * @p: the thread to be awakened
+ + * @cookie: context's cookie for pinning
    *
    * Put @p on the run-queue if it's not already there. The caller must
    * ensure that this_rq() is locked, @p is bound to this_rq() and not
@@@ -2132,7 -2117,8 +2139,7 @@@ static void try_to_wake_up_local(struc
                 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
   
         ttwu_do_wakeup(rq, p, 0, cookie);
- -      if (schedstat_enabled())
- -              ttwu_stat(p, smp_processor_id(), 0);
+ +      ttwu_stat(p, smp_processor_id(), 0);
   out:
         raw_spin_unlock(&p->pi_lock);
   }
@@@ -2992,23 -2978,6 +2999,23 @@@ DEFINE_PER_CPU(struct kernel_cpustat, k
   EXPORT_PER_CPU_SYMBOL(kstat);
   EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
   
+ +/*
+ + * The function fair_sched_class.update_curr accesses the struct curr
+ + * and its field curr->exec_start; when called from task_sched_runtime(),
+ + * we observe a high rate of cache misses in practice.
+ + * Prefetching this data results in improved performance.
+ + */
+ +static inline void prefetch_curr_exec_start(struct task_struct *p)
+ +{
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +      struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+ +#else
+ +      struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+ +#endif
+ +      prefetch(curr);
+ +      prefetch(&curr->exec_start);
+ +}
+ +
   /*
    * Return accounted runtime for the task.
    * In case the task is currently running, return the runtime plus current's
@@@ -3043,7 -3012,6 +3050,7 @@@ unsigned long long task_sched_runtime(s
          * thread, breaking clock_gettime().
          */
         if (task_current(rq, p) && task_on_rq_queued(p)) {
+ +              prefetch_curr_exec_start(p);
                 update_rq_clock(rq);
                 p->sched_class->update_curr(rq);
         }
@@@ -3190,9 -3158,6 +3197,9 @@@ static inline void preempt_latency_stop
    */
   static noinline void __schedule_bug(struct task_struct *prev)
   {
+ +      /* Save this before calling printk(), since that will clobber it */
+ +      unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
+ +
         if (oops_in_progress)
                 return;
   
@@@ -3203,12 -3168,13 +3210,12 @@@
         print_modules();
         if (irqs_disabled())
                 print_irqtrace_events(prev);
- -#ifdef CONFIG_DEBUG_PREEMPT
- -      if (in_atomic_preempt_off()) {
+ +      if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
+ +          && in_atomic_preempt_off()) {
                 pr_err("Preemption disabled at:");
- -              print_ip_sym(current->preempt_disable_ip);
+ +              print_ip_sym(preempt_disable_ip);
                 pr_cont("\n");
         }
- -#endif
         if (panic_on_warn)
                 panic("scheduling while atomic\n");
   
@@@ -3234,7 -3200,7 +3241,7 @@@ static inline void schedule_debug(struc
   
         profile_hit(SCHED_PROFILING, __builtin_return_address(0));
   
- -      schedstat_inc(this_rq(), sched_count);
+ +      schedstat_inc(this_rq()->sched_count);
   }
   
   /*
@@@ -3403,6 -3369,7 +3410,6 @@@ static void __sched notrace __schedule(
   
         balance_callback(rq);
   }
- -STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
   
   static inline void sched_submit_work(struct task_struct *tsk)
   {
@@@ -4845,7 -4812,7 +4852,7 @@@ SYSCALL_DEFINE0(sched_yield
   {
         struct rq *rq = this_rq_lock();
   
- -      schedstat_inc(rq, yld_count);
+ +      schedstat_inc(rq->yld_count);
         current->sched_class->yield_task(rq);
   
         /*
@@@ -4996,7 -4963,7 +5003,7 @@@ again
   
         yielded = curr->sched_class->yield_to_task(rq, p, preempt);
         if (yielded) {
- -              schedstat_inc(rq, yld_count);
+ +              schedstat_inc(rq->yld_count);
                 /*
                  * Make p's CPU reschedule; pick_next_entity takes care of
                  * fairness.
@@@ -5734,7 -5701,6 +5741,7 @@@ static int sd_degenerate(struct sched_d
                          SD_BALANCE_FORK |
                          SD_BALANCE_EXEC |
                          SD_SHARE_CPUCAPACITY |
+ +                       SD_ASYM_CPUCAPACITY |
                          SD_SHARE_PKG_RESOURCES |
                          SD_SHARE_POWERDOMAIN)) {
                 if (sd->groups != sd->groups->next)
@@@ -5765,7 -5731,6 +5772,7 @@@ sd_parent_degenerate(struct sched_domai
                                 SD_BALANCE_NEWIDLE |
                                 SD_BALANCE_FORK |
                                 SD_BALANCE_EXEC |
+ +                              SD_ASYM_CPUCAPACITY |
                                 SD_SHARE_CPUCAPACITY |
                                 SD_SHARE_PKG_RESOURCES |
                                 SD_PREFER_SIBLING |
@@@ -6375,32 -6340,23 +6382,32 @@@ static int sched_domains_curr_level
   /*
    * SD_flags allowed in topology descriptions.
    *
- - * SD_SHARE_CPUCAPACITY      - describes SMT topologies
- - * SD_SHARE_PKG_RESOURCES - describes shared caches
- - * SD_NUMA                - describes NUMA topologies
- - * SD_SHARE_POWERDOMAIN   - describes shared power domain
+ + * These flags are purely descriptive of the topology and do not prescribe
+ + * behaviour. Behaviour is artificial and mapped in the below sd_init()
+ + * function:
+ + *
+ + *   SD_SHARE_CPUCAPACITY   - describes SMT topologies
+ + *   SD_SHARE_PKG_RESOURCES - describes shared caches
+ + *   SD_NUMA                - describes NUMA topologies
+ + *   SD_SHARE_POWERDOMAIN   - describes shared power domain
+ + *   SD_ASYM_CPUCAPACITY    - describes mixed capacity topologies
+ + *
+ + * Odd one out, which beside describing the topology has a quirk also
+ + * prescribes the desired behaviour that goes along with it:
    *
- - * Odd one out:
- - * SD_ASYM_PACKING        - describes SMT quirks
+ + *   SD_ASYM_PACKING        - describes SMT quirks
    */
   #define TOPOLOGY_SD_FLAGS             \
         (SD_SHARE_CPUCAPACITY |         \
          SD_SHARE_PKG_RESOURCES |       \
          SD_NUMA |                      \
          SD_ASYM_PACKING |              \
+ +       SD_ASYM_CPUCAPACITY |          \
          SD_SHARE_POWERDOMAIN)
   
   static struct sched_domain *
- -sd_init(struct sched_domain_topology_level *tl, int cpu)
+ +sd_init(struct sched_domain_topology_level *tl,
+ +      struct sched_domain *child, int cpu)
   {
         struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
         int sd_weight, sd_flags = 0;
@@@ -6452,7 -6408,6 +6459,7 @@@
                 .smt_gain               = 0,
                 .max_newidle_lb_cost    = 0,
                 .next_decay_max_lb_cost = jiffies,
+ +              .child                  = child,
   #ifdef CONFIG_SCHED_DEBUG
                 .name                   = tl->name,
   #endif
@@@ -6462,13 -6417,6 +6469,13 @@@
          * Convert topological properties into behaviour.
          */
   
+ +      if (sd->flags & SD_ASYM_CPUCAPACITY) {
+ +              struct sched_domain *t = sd;
+ +
+ +              for_each_lower_domain(t)
+ +                      t->flags |= SD_BALANCE_WAKE;
+ +      }
+ +
         if (sd->flags & SD_SHARE_CPUCAPACITY) {
                 sd->flags |= SD_PREFER_SIBLING;
                 sd->imbalance_pct = 110;
@@@ -6884,13 -6832,16 +6891,13 @@@ struct sched_domain *build_sched_domain
                 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
                 struct sched_domain *child, int cpu)
   {
- -      struct sched_domain *sd = sd_init(tl, cpu);
- -      if (!sd)
- -              return child;
+ +      struct sched_domain *sd = sd_init(tl, child, cpu);
   
         cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
         if (child) {
                 sd->level = child->level + 1;
                 sched_domain_level_max = max(sched_domain_level_max, sd->level);
                 child->parent = sd;
- -              sd->child = child;
   
                 if (!cpumask_subset(sched_domain_span(child),
                                     sched_domain_span(sd))) {
@@@ -6921,7 -6872,6 +6928,7 @@@ static int build_sched_domains(const st
         enum s_alloc alloc_state;
         struct sched_domain *sd;
         struct s_data d;
+ +      struct rq *rq = NULL;
         int i, ret = -ENOMEM;
   
         alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@@ -6972,22 -6922,11 +6979,22 @@@
         /* Attach the domains */
         rcu_read_lock();
         for_each_cpu(i, cpu_map) {
+ +              rq = cpu_rq(i);
                 sd = *per_cpu_ptr(d.sd, i);
+ +
+ +              /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
+ +              if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
+ +                      WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
+ +
                 cpu_attach_domain(sd, d.rd, i);
         }
         rcu_read_unlock();
   
+ +      if (rq) {
+ +              pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
+ +                      cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
+ +      }
+ +
         ret = 0;
   error:
         __free_domain_allocs(&d, alloc_state, cpu_map);
@@@ -7550,6 -7489,10 +7557,6 @@@ void __init sched_init(void
   
         set_load_weight(&init_task);
   
- -#ifdef CONFIG_PREEMPT_NOTIFIERS
- -      INIT_HLIST_HEAD(&init_task.preempt_notifiers);
- -#endif
- -
         /*
          * The boot idle thread does lazy MMU switching as well:
          */
@@@ -7615,7 -7558,6 +7622,7 @@@ EXPORT_SYMBOL(__might_sleep)
   void ___might_sleep(const char *file, int line, int preempt_offset)
   {
         static unsigned long prev_jiffy;        /* ratelimiting */
+ +      unsigned long preempt_disable_ip;
   
         rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
         if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
@@@ -7626,9 -7568,6 +7633,9 @@@
                 return;
         prev_jiffy = jiffies;
   
+ +      /* Save this before calling printk(), since that will clobber it */
+ +      preempt_disable_ip = get_preempt_disable_ip(current);
+ +
         printk(KERN_ERR
                 "BUG: sleeping function called from invalid context at %s:%d\n",
                         file, line);
@@@ -7643,14 -7582,14 +7650,14 @@@
         debug_show_held_locks(current);
         if (irqs_disabled())
                 print_irqtrace_events(current);
- -#ifdef CONFIG_DEBUG_PREEMPT
- -      if (!preempt_count_equals(preempt_offset)) {
+ +      if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
+ +          && !preempt_count_equals(preempt_offset)) {
                 pr_err("Preemption disabled at:");
- -              print_ip_sym(current->preempt_disable_ip);
+ +              print_ip_sym(preempt_disable_ip);
                 pr_cont("\n");
         }
- -#endif
         dump_stack();
+ +      add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
   }
   EXPORT_SYMBOL(___might_sleep);
   #endif
@@@ -7671,10 -7610,12 +7678,10 @@@ void normalize_rt_tasks(void
                 if (p->flags & PF_KTHREAD)
                         continue;
   
- -              p->se.exec_start                = 0;
- -#ifdef CONFIG_SCHEDSTATS
- -              p->se.statistics.wait_start     = 0;
- -              p->se.statistics.sleep_start    = 0;
- -              p->se.statistics.block_start    = 0;
- -#endif
+ +              p->se.exec_start = 0;
+ +              schedstat_set(p->se.statistics.wait_start,  0);
+ +              schedstat_set(p->se.statistics.sleep_start, 0);
+ +              schedstat_set(p->se.statistics.block_start, 0);
   
                 if (!dl_task(p) && !rt_task(p)) {
                         /*
diff --combined lib/Kconfig.debug

index 2e2cca5092318faf62d10ed96a3b7d0a8b9cc129,ffc2826a092ce498aae1fc9a772acfaf7fc27e82..785f04c80fcfabfb4b3c14d2b5039a98abb21103
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -1214,7 -1214,7 +1214,7 @@@ config DEBUG_BUGVERBOS
   
   config DEBUG_LIST
         bool "Debug linked list manipulation"
-       depends on DEBUG_KERNEL
+       depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
         help
           Enable this to turn on extended checks in the linked-list
           walking routines.
@@@ -1307,6 -1307,7 +1307,7 @@@ config TORTURE_TES
   config RCU_PERF_TEST
         tristate "performance tests for RCU"
         depends on DEBUG_KERNEL
+       depends on !UML
         select TORTURE_TEST
         select SRCU
         select TASKS_RCU
@@@ -1324,6 -1325,7 +1325,7 @@@
   config RCU_TORTURE_TEST
         tristate "torture tests for RCU"
         depends on DEBUG_KERNEL
+       depends on !UML
         select TORTURE_TEST
         select SRCU
         select TASKS_RCU
@@@ -1413,6 -1415,24 +1415,24 @@@ config RCU_TORTURE_TEST_SLOW_CLEANUP_DE
           This option specifies the number of jiffies to wait between
           each rcu_node structure cleanup operation.
   
+ config WAKE_TORTURE_TEST
+       tristate "Torture test for wakeups and CPU hotplug"
+       depends on DEBUG_KERNEL
+       depends on 64BIT
+       depends on TRACE_CLOCK
+       select TORTURE_TEST
+       default n
+       help
+         This option provides a kernel module that runs torture tests
+         on wakeups from timed waits in the presence of CPU hotplug.
+         The kernel module may be built after the fact on the running
+         kernel to be tested, if desired.
+ 
+         Say Y here if you want wakeup torture tests to be built into
+         the kernel.
+         Say M if you want the wakeup torture tests to build as a module.
+         Say N if you are unsure.
+ 
   config RCU_CPU_STALL_TIMEOUT
         int "RCU CPU stall timeout in seconds"
         depends on RCU_STALL_COMMON
@@@ -1686,6 -1706,24 +1706,6 @@@ config LATENCYTO
           Enable this option if you want to use the LatencyTOP tool
           to find out which userspace is blocking on what kernel operations.
   
- -config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
- -      bool
- -
- -config DEBUG_STRICT_USER_COPY_CHECKS
- -      bool "Strict user copy size checks"
- -      depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
- -      depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
- -      help
- -        Enabling this option turns a certain set of sanity checks for user
- -        copy operations into compile time failures.
- -
- -        The copy_from_user() etc checks are there to help test if there
- -        are sufficient security checks on the length argument of
- -        the copy operation, by having gcc prove that the argument is
- -        within bounds.
- -
- -        If unsure, say N.
- -
   source kernel/trace/Kconfig
   
   menu "Runtime Testing"
@@@ -1969,6 -2007,16 +1989,16 @@@ config TEST_STATIC_KEY
   
           If unsure, say N.
   
+ config BUG_ON_DATA_CORRUPTION
+       bool "Trigger a BUG when data corruption is detected"
+       select DEBUG_LIST
+       help
+         Select this option if the kernel should BUG when it encounters
+         data corruption in kernel memory structures when they get checked
+         for validity.
+ 
+         If unsure, say N.
+ 
   source "samples/Kconfig"
   
   source "lib/Kconfig.kgdb"
author	Stephen Rothwell <sfr@canb.auug.org.au>
	Tue, 13 Sep 2016 02:12:37 +0000 (12:12 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Tue, 13 Sep 2016 02:12:37 +0000 (12:12 +1000)
		1	2
include/linux/bug.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cpu.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcu/tree.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig.debug	patch \|	diff1 \|	diff2 \|	blob \| history