Merge remote-tracking branch 'tip/auto-latest'
authorStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 01:53:17 +0000 (11:53 +1000)
committerStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 01:53:17 +0000 (11:53 +1000)
27 files changed:
1  2 
Documentation/kernel-parameters.txt
MAINTAINERS
arch/Kconfig
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/kernel/head_32.S
arch/x86/kernel/head_64.S
arch/x86/lib/memcpy_64.S
drivers/acpi/internal.h
drivers/md/raid5.c
drivers/net/ethernet/marvell/mvneta.c
drivers/perf/arm_pmu.c
include/linux/acpi.h
include/linux/compiler.h
include/linux/cpu.h
include/linux/cpuhotplug.h
include/linux/jump_label.h
include/linux/perf/arm_pmu.h
include/linux/perf_event.h
include/linux/sched.h
kernel/cpu.c
kernel/events/core.c
kernel/fork.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sched/sched.h
kernel/softirq.c

index 73191f4fe5b6a41b9bee5ff08b336feea1f36817,3725976d0af52311548f60acc6f6d6ac606b5675..b28015cad949571408b4e544dc1d1588be9d4a8f
@@@ -460,15 -460,6 +460,15 @@@ bytes respectively. Such letter suffixe
                        driver will print ACPI tables for AMD IOMMU during
                        IOMMU initialization.
  
 +      amd_iommu_intr= [HW,X86-64]
 +                      Specifies one of the following AMD IOMMU interrupt
 +                      remapping modes:
 +                      legacy     - Use legacy interrupt remapping mode.
 +                      vapic      - Use virtual APIC mode, which allows IOMMU
 +                                   to inject interrupts directly into guest.
 +                                   This mode requires kvm-amd.avic=1.
 +                                   (Default when IOMMU HW support is present.)
 +
        amijoy.map=     [HW,JOY] Amiga joystick support
                        Map of devices attached to JOY0DAT and JOY1DAT
                        Format: <a>,<b>
  
        initrd=         [BOOT] Specify the location of the initial ramdisk
  
+       init_pkru=      [x86] Specify the default memory protection keys rights
+                       register contents for all processes.  0x55555554 by
+                       default (disallow access to all but pkey 0).  Can
+                       override in debugfs after boot.
        inport.irq=     [HW] Inport (ATI XL and Microsoft) busmouse driver
                        Format: <irq>
  
  
        intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
                        0       disables intel_idle and fall back on acpi_idle.
 -                      1 to 6  specify maximum depth of C-state.
 +                      1 to 9  specify maximum depth of C-state.
  
        intel_pstate=  [X86]
                       disable
                        than or equal to this physical address is ignored.
  
        maxcpus=        [SMP] Maximum number of processors that an SMP kernel
 -                      should make use of.  maxcpus=n : n >= 0 limits the
 -                      kernel to using 'n' processors.  n=0 is a special case,
 -                      it is equivalent to "nosmp", which also disables
 -                      the IO APIC.
 +                      will bring up during bootup.  maxcpus=n : n >= 0 limits
 +                      the kernel to bring up 'n' processors. Surely after
 +                      bootup you can bring up the other plugged cpu by executing
 +                      "echo 1 > /sys/devices/system/cpu/cpuX/online". So maxcpus
 +                      only takes effect during system bootup.
 +                      While n=0 is a special case, it is equivalent to "nosmp",
 +                      which also disables the IO APIC.
  
        max_loop=       [LOOP] The number of loop block devices that get
        (loop.max_loop) unconditionally pre-created at init time. The default
  
        nodelayacct     [KNL] Disable per-task delay accounting
  
 -      nodisconnect    [HW,SCSI,M68K] Disables SCSI disconnects.
 -
        nodsp           [SH] Disable hardware DSP at boot time.
  
        noefi           Disable EFI runtime services support.
  
        nr_cpus=        [SMP] Maximum number of processors that an SMP kernel
                        could support.  nr_cpus=n : n >= 1 limits the kernel to
 -                      supporting 'n' processors. Later in runtime you can not
 -                      use hotplug cpu feature to put more cpu back to online.
 -                      just like you compile the kernel NR_CPUS=n
 +                      support 'n' processors. It could be larger than the
 +                      number of already plugged CPU during bootup, later in
 +                      runtime you can physically add extra cpu until it reaches
 +                      n. So during boot up some boot time memory for per-cpu
 +                      variables need be pre-allocated for later physical cpu
 +                      hot plugging.
  
        nr_uarts=       [SERIAL] maximum number of UARTs to be registered.
  
        raid=           [HW,RAID]
                        See Documentation/md.txt.
  
 +      ram_latent_entropy
 +                      Enable a very simple form of latent entropy extraction
 +                      from the first 4GB of memory as the bootmem allocator
 +                      passes the memory pages to the buddy allocator.
 +
        ramdisk_size=   [RAM] Sizes of RAM disks in kilobytes
                        See Documentation/blockdev/ramdisk.txt.
  
diff --combined MAINTAINERS
index 11535f6a48fb1df0bc643ed4bd25f8e894b1664d,25b254a22fb941653705ad1a65240e4426a58b7e..c64e59f7ed328ebb008787a0e9ee8260a1dc3df6
@@@ -636,15 -636,6 +636,15 @@@ F:       drivers/tty/serial/altera_jtaguart.
  F:    include/linux/altera_uart.h
  F:    include/linux/altera_jtaguart.h
  
 +AMAZON ETHERNET DRIVERS
 +M:    Netanel Belgazal <netanel@annapurnalabs.com>
 +R:    Saeed Bishara <saeed@annapurnalabs.com>
 +R:    Zorik Machulsky <zorik@annapurnalabs.com>
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +F:    Documentation/networking/ena.txt
 +F:    drivers/net/ethernet/amazon/
 +
  AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
  M:    Tom Lendacky <thomas.lendacky@amd.com>
  M:    Gary Hook <gary.hook@amd.com>
@@@ -1001,7 -992,6 +1001,7 @@@ M:       Chen-Yu Tsai <wens@csie.org
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
  N:    sun[x456789]i
 +F:    arch/arm/boot/dts/ntc-gr8*
  
  ARM/Allwinner SoC Clock Support
  M:    Emilio López <emilio@elopez.com.ar>
@@@ -1019,7 -1009,6 +1019,7 @@@ F:      arch/arm/mach-meson
  F:    arch/arm/boot/dts/meson*
  F:    arch/arm64/boot/dts/amlogic/
  F:    drivers/pinctrl/meson/
 +F:      drivers/mmc/host/meson*
  N:    meson
  
  ARM/Annapurna Labs ALPINE ARCHITECTURE
@@@ -1636,7 -1625,6 +1636,7 @@@ N:      rockchi
  ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
  M:    Kukjin Kim <kgene@kernel.org>
  M:    Krzysztof Kozlowski <krzk@kernel.org>
 +R:    Javier Martinez Canillas <javier@osg.samsung.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  S:    Maintained
@@@ -1688,6 -1676,14 +1688,6 @@@ S:     Maintaine
  F:    arch/arm/plat-samsung/s5p-dev-mfc.c
  F:    drivers/media/platform/s5p-mfc/
  
 -ARM/SAMSUNG S5P SERIES TV SUBSYSTEM SUPPORT
 -M:    Kyungmin Park <kyungmin.park@samsung.com>
 -M:    Tomasz Stanislawski <t.stanislaws@samsung.com>
 -L:    linux-arm-kernel@lists.infradead.org
 -L:    linux-media@vger.kernel.org
 -S:    Maintained
 -F:    drivers/media/platform/s5p-tv/
 -
  ARM/SAMSUNG S5P SERIES HDMI CEC SUBSYSTEM SUPPORT
  M:    Kyungmin Park <kyungmin.park@samsung.com>
  L:    linux-arm-kernel@lists.infradead.org
@@@ -1845,7 -1841,6 +1845,7 @@@ F:      arch/arm64/boot/dts/socionext
  F:    drivers/bus/uniphier-system-bus.c
  F:    drivers/i2c/busses/i2c-uniphier*
  F:    drivers/pinctrl/uniphier/
 +F:    drivers/reset/reset-uniphier.c
  F:    drivers/tty/serial/8250/8250_uniphier.c
  N:    uniphier
  
@@@ -2227,9 -2222,9 +2227,9 @@@ S:      Maintaine
  F:    drivers/net/wireless/atmel/atmel*
  
  ATMEL MAXTOUCH DRIVER
 -M:    Nick Dyer <nick.dyer@itdev.co.uk>
 -T:    git git://github.com/atmel-maxtouch/linux.git
 -S:    Supported
 +M:    Nick Dyer <nick@shmanahar.org>
 +T:    git git://github.com/ndyer/linux.git
 +S:    Maintained
  F:    Documentation/devicetree/bindings/input/atmel,maxtouch.txt
  F:    drivers/input/touchscreen/atmel_mxt_ts.c
  F:    include/linux/platform_data/atmel_mxt_ts.h
@@@ -2490,7 -2485,7 +2490,7 @@@ F:      include/net/bluetooth
  BONDING DRIVER
  M:    Jay Vosburgh <j.vosburgh@gmail.com>
  M:    Veaceslav Falico <vfalico@gmail.com>
 -M:    Andy Gospodarek <gospo@cumulusnetworks.com>
 +M:    Andy Gospodarek <andy@greyhouse.net>
  L:    netdev@vger.kernel.org
  W:    http://sourceforge.net/projects/bonding/
  S:    Supported
@@@ -2893,14 -2888,6 +2893,14 @@@ S:    Maintaine
  F:    drivers/iio/light/cm*
  F:    Documentation/devicetree/bindings/i2c/trivial-devices.txt
  
 +CAVIUM I2C DRIVER
 +M:    Jan Glauber <jglauber@cavium.com>
 +M:    David Daney <david.daney@cavium.com>
 +W:    http://www.cavium.com
 +S:    Supported
 +F:    drivers/i2c/busses/i2c-octeon*
 +F:    drivers/i2c/busses/i2c-thunderx*
 +
  CAVIUM LIQUIDIO NETWORK DRIVER
  M:     Derek Chickles <derek.chickles@caviumnetworks.com>
  M:     Satanand Burla <satananda.burla@caviumnetworks.com>
@@@ -3148,7 -3135,7 +3148,7 @@@ L:      cocci@systeme.lip6.fr (moderated fo
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
  W:    http://coccinelle.lip6.fr/
  S:    Supported
 -F:    Documentation/coccinelle.txt
 +F:    Documentation/dev-tools/coccinelle.rst
  F:    scripts/coccinelle/
  F:    scripts/coccicheck
  
@@@ -3282,7 -3269,7 +3282,7 @@@ S:      Maintaine
  F:    drivers/net/wan/cosa*
  
  CPMAC ETHERNET DRIVER
 -M:    Florian Fainelli <florian@openwrt.org>
 +M:    Florian Fainelli <f.fainelli@gmail.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    drivers/net/ethernet/ti/cpmac.c
@@@ -3775,8 -3762,8 +3775,8 @@@ F:      drivers/leds/leds-da90??.
  F:    drivers/mfd/da903x.c
  F:    drivers/mfd/da90??-*.c
  F:    drivers/mfd/da91??-*.c
 -F:    drivers/power/da9052-battery.c
 -F:    drivers/power/da91??-*.c
 +F:    drivers/power/supply/da9052-battery.c
 +F:    drivers/power/supply/da91??-*.c
  F:    drivers/regulator/da903x.c
  F:    drivers/regulator/da9???-regulator.[ch]
  F:    drivers/rtc/rtc-da90??.c
@@@ -4087,14 -4074,6 +4087,14 @@@ S:    Orphan / Obsolet
  F:    drivers/gpu/drm/i810/
  F:    include/uapi/drm/i810_drm.h
  
 +DRM DRIVERS FOR MEDIATEK
 +M:    CK Hu <ck.hu@mediatek.com>
 +M:    Philipp Zabel <p.zabel@pengutronix.de>
 +L:    dri-devel@lists.freedesktop.org
 +S:    Supported
 +F:    drivers/gpu/drm/mediatek/
 +F:    Documentation/devicetree/bindings/display/mediatek/
 +
  DRM DRIVER FOR MSM ADRENO GPU
  M:    Rob Clark <robdclark@gmail.com>
  L:    linux-arm-msm@vger.kernel.org
@@@ -5156,7 -5135,7 +5156,7 @@@ GCOV BASED KERNEL PROFILIN
  M:    Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
  S:    Maintained
  F:    kernel/gcov/
 -F:    Documentation/gcov.txt
 +F:    Documentation/dev-tools/gcov.rst
  
  GDT SCSI DISK ARRAY CONTROLLER DRIVER
  M:    Achim Leubner <achim_leubner@adaptec.com>
@@@ -5595,14 -5574,6 +5595,14 @@@ S:    Maintaine
  F:    drivers/net/ethernet/hisilicon/
  F:    Documentation/devicetree/bindings/net/hisilicon*.txt
  
 +HISILICON ROCE DRIVER
 +M:    Lijun Ou <oulijun@huawei.com>
 +M:    Wei Hu(Xavier) <xavier.huwei@huawei.com>
 +L:    linux-rdma@vger.kernel.org
 +S:    Maintained
 +F:    drivers/infiniband/hw/hns/
 +F:    Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt
 +
  HISILICON SAS Controller
  M:    John Garry <john.garry@huawei.com>
  W:    http://www.hisilicon.com
@@@ -5680,14 -5651,6 +5680,14 @@@ M:    Nadia Yvette Chambers <nyc@holomorph
  S:    Maintained
  F:    fs/hugetlbfs/
  
 +HVA ST MEDIA DRIVER
 +M:    Jean-Christophe Trotin <jean-christophe.trotin@st.com>
 +L:    linux-media@vger.kernel.org
 +T:    git git://linuxtv.org/media_tree.git
 +W:    https://linuxtv.org
 +S:    Supported
 +F:    drivers/media/platform/sti/hva
 +
  Hyper-V CORE AND DRIVERS
  M:    "K. Y. Srinivasan" <kys@microsoft.com>
  M:    Haiyang Zhang <haiyangz@microsoft.com>
@@@ -5714,8 -5677,6 +5714,8 @@@ S:      Maintaine
  F:    Documentation/i2c/i2c-topology
  F:    Documentation/i2c/muxes/
  F:    Documentation/devicetree/bindings/i2c/i2c-mux*
 +F:    Documentation/devicetree/bindings/i2c/i2c-arb*
 +F:    Documentation/devicetree/bindings/i2c/i2c-gate*
  F:    drivers/i2c/i2c-mux.c
  F:    drivers/i2c/muxes/
  F:    include/linux/i2c-mux.h
@@@ -6032,12 -5993,6 +6032,12 @@@ M:    Zubair Lutfullah Kakakhel <Zubair.Ka
  S:    Maintained
  F:    drivers/dma/dma-jz4780.c
  
 +INGENIC JZ4780 NAND DRIVER
 +M:    Harvey Hunt <harveyhuntnexus@gmail.com>
 +L:    linux-mtd@lists.infradead.org
 +S:    Maintained
 +F:    drivers/mtd/nand/jz4780_*
 +
  INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
  M:    Mimi Zohar <zohar@linux.vnet.ibm.com>
  M:    Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
@@@ -6139,13 -6094,6 +6139,13 @@@ T:    git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    drivers/idle/intel_idle.c
  
 +INTEL INTEGRATED SENSOR HUB DRIVER
 +M:    Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 +M:    Jiri Kosina <jikos@kernel.org>
 +L:    linux-input@vger.kernel.org
 +S:    Maintained
 +F:    drivers/hid/intel-ish-hid/
 +
  INTEL PSTATE DRIVER
  M:    Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  M:    Len Brown <lenb@kernel.org>
@@@ -6656,7 -6604,7 +6656,7 @@@ L:      kasan-dev@googlegroups.co
  S:    Maintained
  F:    arch/*/include/asm/kasan.h
  F:    arch/*/mm/kasan_init*
 -F:    Documentation/kasan.txt
 +F:    Documentation/dev-tools/kasan.rst
  F:    include/linux/kasan*.h
  F:    lib/test_kasan.c
  F:    mm/kasan/
@@@ -6872,7 -6820,7 +6872,7 @@@ KMEMCHEC
  M:    Vegard Nossum <vegardno@ifi.uio.no>
  M:    Pekka Enberg <penberg@kernel.org>
  S:    Maintained
 -F:    Documentation/kmemcheck.txt
 +F:    Documentation/dev-tools/kmemcheck.rst
  F:    arch/x86/include/asm/kmemcheck.h
  F:    arch/x86/mm/kmemcheck/
  F:    include/linux/kmemcheck.h
@@@ -6881,7 -6829,7 +6881,7 @@@ F:      mm/kmemcheck.
  KMEMLEAK
  M:    Catalin Marinas <catalin.marinas@arm.com>
  S:    Maintained
 -F:    Documentation/kmemleak.txt
 +F:    Documentation/dev-tools/kmemleak.rst
  F:    include/linux/kmemleak.h
  F:    mm/kmemleak.c
  F:    mm/kmemleak-test.c
@@@ -7521,8 -7469,8 +7521,8 @@@ M:      Krzysztof Kozlowski <krzk@kernel.org
  M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
  L:    linux-pm@vger.kernel.org
  S:    Supported
 -F:    drivers/power/max14577_charger.c
 -F:    drivers/power/max77693_charger.c
 +F:    drivers/power/supply/max14577_charger.c
 +F:    drivers/power/supply/max77693_charger.c
  
  MAXIM MAX77802 MULTIFUNCTION PMIC DEVICE DRIVERS
  M:    Javier Martinez Canillas <javier@osg.samsung.com>
@@@ -7804,14 -7752,6 +7804,14 @@@ T:    git git://git.monstr.eu/linux-2.6-mi
  S:    Supported
  F:    arch/microblaze/
  
 +MICROCHIP / ATMEL ISC DRIVER
 +M:    Songjun Wu <songjun.wu@microchip.com>
 +L:    linux-media@vger.kernel.org
 +S:    Supported
 +F:    drivers/media/platform/atmel/atmel-isc.c
 +F:    drivers/media/platform/atmel/atmel-isc-regs.h
 +F:    devicetree/bindings/media/atmel-isc.txt
 +
  MICROSOFT SURFACE PRO 3 BUTTON DRIVER
  M:    Chen Yu <yu.c.chen@intel.com>
  L:    platform-driver-x86@vger.kernel.org
@@@ -8023,7 -7963,6 +8023,7 @@@ MULTIFUNCTION DEVICES (MFD
  M:    Lee Jones <lee.jones@linaro.org>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git
  S:    Supported
 +F:    Documentation/devicetree/bindings/mfd/
  F:    drivers/mfd/
  F:    include/linux/mfd/
  
@@@ -8403,11 -8342,11 +8403,11 @@@ R:   Pali Rohár <pali.rohar@gmail.com
  F:    include/linux/power/bq2415x_charger.h
  F:    include/linux/power/bq27xxx_battery.h
  F:    include/linux/power/isp1704_charger.h
 -F:    drivers/power/bq2415x_charger.c
 -F:    drivers/power/bq27xxx_battery.c
 -F:    drivers/power/bq27xxx_battery_i2c.c
 -F:    drivers/power/isp1704_charger.c
 -F:    drivers/power/rx51_battery.c
 +F:    drivers/power/supply/bq2415x_charger.c
 +F:    drivers/power/supply/bq27xxx_battery.c
 +F:    drivers/power/supply/bq27xxx_battery_i2c.c
 +F:    drivers/power/supply/isp1704_charger.c
 +F:    drivers/power/supply/rx51_battery.c
  
  NTB DRIVER CORE
  M:    Jon Mason <jdmason@kudzu.us>
@@@ -8908,6 -8847,7 +8908,7 @@@ S:      Supporte
  F:    Documentation/virtual/paravirt_ops.txt
  F:    arch/*/kernel/paravirt*
  F:    arch/*/include/asm/paravirt.h
+ F:    include/linux/hypervisor.h
  
  PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
  M:    Tim Waugh <tim@cyberelk.net>
@@@ -9406,12 -9346,16 +9407,12 @@@ F:   drivers/powercap
  
  POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
  M:    Sebastian Reichel <sre@kernel.org>
 -M:    Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
 -M:    David Woodhouse <dwmw2@infradead.org>
  L:    linux-pm@vger.kernel.org
 -T:    git git://git.infradead.org/battery-2.6.git
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
  S:    Maintained
 -F:    Documentation/devicetree/bindings/power/
 -F:    Documentation/devicetree/bindings/power_supply/
 +F:    Documentation/devicetree/bindings/power/supply/
  F:    include/linux/power_supply.h
 -F:    drivers/power/
 -X:    drivers/power/avs/
 +F:    drivers/power/supply/
  
  POWER STATE COORDINATION INTERFACE (PSCI)
  M:    Mark Rutland <mark.rutland@arm.com>
@@@ -9747,12 -9691,6 +9748,12 @@@ T:    git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    drivers/net/wireless/ath/ath10k/
  
 +QUALCOMM EMAC GIGABIT ETHERNET DRIVER
 +M:    Timur Tabi <timur@codeaurora.org>
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +F:    drivers/net/ethernet/qualcomm/emac/
 +
  QUALCOMM HEXAGON ARCHITECTURE
  M:    Richard Kuo <rkuo@codeaurora.org>
  L:    linux-hexagon@vger.kernel.org
@@@ -10008,7 -9946,6 +10009,7 @@@ F:    net/rfkill
  
  RHASHTABLE
  M:    Thomas Graf <tgraf@suug.ch>
 +M:    Herbert Xu <herbert@gondor.apana.org.au>
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    lib/rhashtable.c
@@@ -10417,8 -10354,8 +10418,8 @@@ F:   drivers/thunderbolt
  TI BQ27XXX POWER SUPPLY DRIVER
  R:    Andrew F. Davis <afd@ti.com>
  F:    include/linux/power/bq27xxx_battery.h
 -F:    drivers/power/bq27xxx_battery.c
 -F:    drivers/power/bq27xxx_battery_i2c.c
 +F:    drivers/power/supply/bq27xxx_battery.c
 +F:    drivers/power/supply/bq27xxx_battery_i2c.c
  
  TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
  M:    John Stultz <john.stultz@linaro.org>
@@@ -11178,7 -11115,6 +11179,7 @@@ F:   Documentation/spi
  F:    drivers/spi/
  F:    include/linux/spi/
  F:    include/uapi/linux/spi/
 +F:    tools/spi/
  
  SPIDERNET NETWORK DRIVER for CELL
  M:    Ishizaki Kou <kou.ishizaki@toshiba.co.jp>
@@@ -11421,14 -11357,6 +11422,14 @@@ T: git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    drivers/mfd/syscon.c
  
 +SYSTEM RESET/SHUTDOWN DRIVERS
 +M:    Sebastian Reichel <sre@kernel.org>
 +L:    linux-pm@vger.kernel.org
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/power/reset/
 +F:    drivers/power/reset/
 +
  SYSV FILESYSTEM
  M:    Christoph Hellwig <hch@infradead.org>
  S:    Maintained
@@@ -11777,7 -11705,7 +11778,7 @@@ F:   include/linux/platform_data/lp855x.
  TI LP8727 CHARGER DRIVER
  M:    Milo Kim <milo.kim@ti.com>
  S:    Maintained
 -F:    drivers/power/lp8727_charger.c
 +F:    drivers/power/supply/lp8727_charger.c
  F:    include/linux/platform_data/lp8727.h
  
  TI LP8788 MFD DRIVER
@@@ -11786,7 -11714,7 +11787,7 @@@ S:   Maintaine
  F:    drivers/iio/adc/lp8788_adc.c
  F:    drivers/leds/leds-lp8788.c
  F:    drivers/mfd/lp8788*.c
 -F:    drivers/power/lp8788-charger.c
 +F:    drivers/power/supply/lp8788-charger.c
  F:    drivers/regulator/lp8788-*.c
  F:    include/linux/mfd/lp8788*.h
  
@@@ -11939,14 -11867,6 +11940,14 @@@ T: git git://linuxtv.org/media_tree.gi
  S:    Odd fixes
  F:    drivers/media/usb/tm6000/
  
 +TW5864 VIDEO4LINUX DRIVER
 +M:    Bluecherry Maintainers <maintainers@bluecherrydvr.com>
 +M:    Andrey Utkin <andrey.utkin@corp.bluecherry.net>
 +M:    Andrey Utkin <andrey_utkin@fastmail.com>
 +L:    linux-media@vger.kernel.org
 +S:    Supported
 +F:    drivers/media/pci/tw5864/
 +
  TW68 VIDEO4LINUX DRIVER
  M:    Hans Verkuil <hverkuil@xs4all.nl>
  L:    linux-media@vger.kernel.org
@@@ -12361,7 -12281,6 +12362,7 @@@ F:   drivers/net/usb/smsc75xx.
  
  USB SMSC95XX ETHERNET DRIVER
  M:    Steve Glendinning <steve.glendinning@shawell.net>
 +M:    Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    drivers/net/usb/smsc95xx.*
@@@ -12871,7 -12790,7 +12872,7 @@@ F:   drivers/input/touchscreen/wm97*.
  F:    drivers/mfd/arizona*
  F:    drivers/mfd/wm*.c
  F:    drivers/mfd/cs47l24*
 -F:    drivers/power/wm83*.c
 +F:    drivers/power/supply/wm83*.c
  F:    drivers/rtc/rtc-wm83*.c
  F:    drivers/regulator/wm8*.c
  F:    drivers/video/backlight/wm83*_bl.c
diff --combined arch/Kconfig
index 47a3ab795955052c3cc553b2c25e373be58e2b72,180ea33164dc45821a78c08043dc95119c1bdb7e..d0b205d34946c8ec046c551150b78ab1d8aa7eef
@@@ -383,24 -383,6 +383,24 @@@ config GCC_PLUGIN_SANCO
          gcc-4.5 on). It is based on the commit "Add fuzzing coverage support"
          by Dmitry Vyukov <dvyukov@google.com>.
  
 +config GCC_PLUGIN_LATENT_ENTROPY
 +      bool "Generate some entropy during boot and runtime"
 +      depends on GCC_PLUGINS
 +      help
 +        By saying Y here the kernel will instrument some kernel code to
 +        extract some entropy from both original and artificially created
 +        program state.  This will help especially embedded systems where
 +        there is little 'natural' source of entropy normally.  The cost
 +        is some slowdown of the boot process (about 0.5%) and fork and
 +        irq processing.
 +
 +        Note that entropy extracted this way is not cryptographically
 +        secure!
 +
 +        This plugin was ported from grsecurity/PaX. More information at:
 +         * https://grsecurity.net/
 +         * https://pax.grsecurity.net/
 +
  config HAVE_CC_STACKPROTECTOR
        bool
        help
@@@ -477,25 -459,6 +477,25 @@@ config HAVE_ARCH_WITHIN_STACK_FRAME
          and similar) by implementing an inline arch_within_stack_frames(),
          which is used by CONFIG_HARDENED_USERCOPY.
  
 +config THIN_ARCHIVES
 +      bool
 +      help
 +        Select this if the architecture wants to use thin archives
 +        instead of ld -r to create the built-in.o files.
 +
 +config LD_DEAD_CODE_DATA_ELIMINATION
 +      bool
 +      help
 +        Select this if the architecture wants to do dead code and
 +        data elimination with the linker by compiling with
 +        -ffunction-sections -fdata-sections and linking with
 +        --gc-sections.
 +
 +        This requires that the arch annotates or otherwise protects
 +        its external entry points from being discarded. Linker scripts
 +        must also merge .text.*, .data.*, and .bss.* correctly into
 +        output sections.
 +
  config HAVE_CONTEXT_TRACKING
        bool
        help
@@@ -733,4 -696,38 +733,38 @@@ config ARCH_NO_COHERENT_DMA_MMA
  config CPU_NO_EFFICIENT_FFS
        def_bool n
  
+ config HAVE_ARCH_VMAP_STACK
+       def_bool n
+       help
+         An arch should select this symbol if it can support kernel stacks
+         in vmalloc space.  This means:
+         - vmalloc space must be large enough to hold many kernel stacks.
+           This may rule out many 32-bit architectures.
+         - Stacks in vmalloc space need to work reliably.  For example, if
+           vmap page tables are created on demand, either this mechanism
+           needs to work while the stack points to a virtual address with
+           unpopulated page tables or arch code (switch_to() and switch_mm(),
+           most likely) needs to ensure that the stack's page table entries
+           are populated before running on a possibly unpopulated stack.
+         - If the stack overflows into a guard page, something reasonable
+           should happen.  The definition of "reasonable" is flexible, but
+           instantly rebooting without logging anything would be unfriendly.
+ config VMAP_STACK
+       default y
+       bool "Use a virtually-mapped stack"
+       depends on HAVE_ARCH_VMAP_STACK && !KASAN
+       ---help---
+         Enable this if you want the use virtually-mapped kernel stacks
+         with guard pages.  This causes kernel stack overflows to be
+         caught immediately rather than causing difficult-to-diagnose
+         corruption.
+         This is presently incompatible with KASAN because KASAN expects
+         the stack to map directly to the KASAN shadow map using a formula
+         that is incorrect if the stack is in vmalloc space.
  source "kernel/gcov/Kconfig"
index 64d29a3ad37676b079c6488a350b4de5debdae61,b75a8bcd2d23cced23df9bade46e6304499d926b..21b352a11b493f4868b1a091dc416c7e2279ad34
@@@ -44,7 -44,6 +44,7 @@@
  #include <asm/alternative-asm.h>
  #include <asm/asm.h>
  #include <asm/smap.h>
 +#include <asm/export.h>
  
        .section .entry.text, "ax"
  
        POP_GS_EX
  .endm
  
+ /*
+  * %eax: prev task
+  * %edx: next task
+  */
+ ENTRY(__switch_to_asm)
+       /*
+        * Save callee-saved registers
+        * This must match the order in struct inactive_task_frame
+        */
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %edi
+       pushl   %esi
+       /* switch stack */
+       movl    %esp, TASK_threadsp(%eax)
+       movl    TASK_threadsp(%edx), %esp
+ #ifdef CONFIG_CC_STACKPROTECTOR
+       movl    TASK_stack_canary(%edx), %ebx
+       movl    %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+ #endif
+       /* restore callee-saved registers */
+       popl    %esi
+       popl    %edi
+       popl    %ebx
+       popl    %ebp
+       jmp     __switch_to
+ END(__switch_to_asm)
+ /*
+  * A newly forked process directly context switches into this address.
+  *
+  * eax: prev task we switched from
+  * ebx: kernel thread func (NULL for user thread)
+  * edi: kernel thread arg
+  */
  ENTRY(ret_from_fork)
        pushl   %eax
        call    schedule_tail
        popl    %eax
  
+       testl   %ebx, %ebx
+       jnz     1f              /* kernel threads are uncommon */
+ 2:
        /* When we fork, we trace the syscall return in the child, too. */
        movl    %esp, %eax
        call    syscall_return_slowpath
        jmp     restore_all
- END(ret_from_fork)
- ENTRY(ret_from_kernel_thread)
-       pushl   %eax
-       call    schedule_tail
-       popl    %eax
-       movl    PT_EBP(%esp), %eax
-       call    *PT_EBX(%esp)
-       movl    $0, PT_EAX(%esp)
  
+       /* kernel thread */
+ 1:    movl    %edi, %eax
+       call    *%ebx
        /*
-        * Kernel threads return to userspace as if returning from a syscall.
-        * We should check whether anything actually uses this path and, if so,
-        * consider switching it over to ret_from_fork.
+        * A kernel thread is allowed to return here after successfully
+        * calling do_execve().  Exit to userspace to complete the execve()
+        * syscall.
         */
-       movl    %esp, %eax
-       call    syscall_return_slowpath
-       jmp     restore_all
- ENDPROC(ret_from_kernel_thread)
+       movl    $0, PT_EAX(%esp)
+       jmp     2b
+ END(ret_from_fork)
  
  /*
   * Return to user mode is not as complex as all this looks,
@@@ -956,7 -991,6 +992,7 @@@ trace
        jmp     ftrace_stub
  END(mcount)
  #endif /* CONFIG_DYNAMIC_FTRACE */
 +EXPORT_SYMBOL(mcount)
  #endif /* CONFIG_FUNCTION_TRACER */
  
  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
index aa605263c45b6344e9e7f135d015697dae2b0e3e,c0373d6676744dc8c6121b4319e112ddf141d528..b9ca6b34b6c539d338445bd5ce5034b98fa41125
@@@ -35,7 -35,6 +35,7 @@@
  #include <asm/asm.h>
  #include <asm/smap.h>
  #include <asm/pgtable_types.h>
 +#include <asm/export.h>
  #include <linux/err.h>
  
  /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
@@@ -352,8 -351,7 +352,7 @@@ ENTRY(stub_ptregs_64
        jmp     entry_SYSCALL64_slow_path
  
  1:
-       /* Called from C */
-       jmp     *%rax                           /* called from C */
+       jmp     *%rax                           /* Called from C */
  END(stub_ptregs_64)
  
  .macro ptregs_stub func
@@@ -369,42 -367,74 +368,74 @@@ END(ptregs_\func
  #define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
  #include <asm/syscalls_64.h>
  
+ /*
+  * %rdi: prev task
+  * %rsi: next task
+  */
+ ENTRY(__switch_to_asm)
+       /*
+        * Save callee-saved registers
+        * This must match the order in inactive_task_frame
+        */
+       pushq   %rbp
+       pushq   %rbx
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+       /* switch stack */
+       movq    %rsp, TASK_threadsp(%rdi)
+       movq    TASK_threadsp(%rsi), %rsp
+ #ifdef CONFIG_CC_STACKPROTECTOR
+       movq    TASK_stack_canary(%rsi), %rbx
+       movq    %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+ #endif
+       /* restore callee-saved registers */
+       popq    %r15
+       popq    %r14
+       popq    %r13
+       popq    %r12
+       popq    %rbx
+       popq    %rbp
+       jmp     __switch_to
+ END(__switch_to_asm)
  /*
   * A newly forked process directly context switches into this address.
   *
-  * rdi: prev task we switched from
+  * rax: prev task we switched from
+  * rbx: kernel thread func (NULL for user thread)
+  * r12: kernel thread arg
   */
  ENTRY(ret_from_fork)
-       LOCK ; btr $TIF_FORK, TI_flags(%r8)
+       movq    %rax, %rdi
        call    schedule_tail                   /* rdi: 'prev' task parameter */
  
-       testb   $3, CS(%rsp)                    /* from kernel_thread? */
-       jnz     1f
-       /*
-        * We came from kernel_thread.  This code path is quite twisted, and
-        * someone should clean it up.
-        *
-        * copy_thread_tls stashes the function pointer in RBX and the
-        * parameter to be passed in RBP.  The called function is permitted
-        * to call do_execve and thereby jump to user mode.
-        */
-       movq    RBP(%rsp), %rdi
-       call    *RBX(%rsp)
-       movl    $0, RAX(%rsp)
-       /*
-        * Fall through as though we're exiting a syscall.  This makes a
-        * twisted sort of sense if we just called do_execve.
-        */
+       testq   %rbx, %rbx                      /* from kernel_thread? */
+       jnz     1f                              /* kernel threads are uncommon */
  
1:
2:
        movq    %rsp, %rdi
        call    syscall_return_slowpath /* returns with IRQs disabled */
        TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
        SWAPGS
        jmp     restore_regs_and_iret
+ 1:
+       /* kernel thread */
+       movq    %r12, %rdi
+       call    *%rbx
+       /*
+        * A kernel thread is allowed to return here after successfully
+        * calling do_execve().  Exit to userspace to complete the execve()
+        * syscall.
+        */
+       movq    $0, RAX(%rsp)
+       jmp     2b
  END(ret_from_fork)
  
  /*
@@@ -801,7 -831,6 +832,7 @@@ ENTRY(native_load_gs_index
        popfq
        ret
  END(native_load_gs_index)
 +EXPORT_SYMBOL(native_load_gs_index)
  
        _ASM_EXTABLE(.Lgs_change, bad_gs)
        .section .fixup, "ax"
index 4707baf9420347eaf808484de967282081f4ec8c,5f401262f12d08c50d6411d80de056ef87b6c6e9..b6b2f0264af36ac272537e5d6689a38361bb60d4
@@@ -23,7 -23,6 +23,7 @@@
  #include <asm/percpu.h>
  #include <asm/nops.h>
  #include <asm/bootparam.h>
 +#include <asm/export.h>
  
  /* Physical address */
  #define pa(X) ((X) - __PAGE_OFFSET)
@@@ -95,7 -94,7 +95,7 @@@ RESERVE_BRK(pagetables, INIT_MAP_SIZE
   */
  __HEAD
  ENTRY(startup_32)
-       movl pa(stack_start),%ecx
+       movl pa(initial_stack),%ecx
        
        /* test KEEP_SEGMENTS flag to see if the bootloader is asking
                us to not reload segments */
@@@ -287,7 -286,7 +287,7 @@@ num_subarch_entries = (. - subarch_entr
   * start_secondary().
   */
  ENTRY(start_cpu0)
-       movl stack_start, %ecx
+       movl initial_stack, %ecx
        movl %ecx, %esp
        jmp  *(initial_code)
  ENDPROC(start_cpu0)
@@@ -308,7 -307,7 +308,7 @@@ ENTRY(startup_32_smp
        movl %eax,%es
        movl %eax,%fs
        movl %eax,%gs
-       movl pa(stack_start),%ecx
+       movl pa(initial_stack),%ecx
        movl %eax,%ss
        leal -__PAGE_OFFSET(%ecx),%esp
  
@@@ -674,7 -673,6 +674,7 @@@ ENTRY(empty_zero_page
        .fill 4096,1,0
  ENTRY(swapper_pg_dir)
        .fill 1024,4,0
 +EXPORT_SYMBOL(empty_zero_page)
  
  /*
   * This starts the data section.
@@@ -705,7 -703,7 +705,7 @@@ ENTRY(initial_page_table
  
  .data
  .balign 4
- ENTRY(stack_start)
+ ENTRY(initial_stack)
        .long init_thread_union+THREAD_SIZE
  
  __INITRODATA
index 537d913f45ecbdf9c397920ce91bc493a888a9ca,c98a559c346ed0c2b5092181130b2a3a2f7b1dcf..b4421cc191b056727f8f8c0def78a750b319a1c4
@@@ -21,7 -21,6 +21,7 @@@
  #include <asm/percpu.h>
  #include <asm/nops.h>
  #include "../entry/calling.h"
 +#include <asm/export.h>
  
  #ifdef CONFIG_PARAVIRT
  #include <asm/asm-offsets.h>
@@@ -67,7 -66,7 +67,7 @@@ startup_64
         */
  
        /*
-        * Setup stack for verify_cpu(). "-8" because stack_start is defined
+        * Setup stack for verify_cpu(). "-8" because initial_stack is defined
         * this way, see below. Our best guess is a NULL ptr for stack
         * termination heuristics and we don't want to break anything which
         * might depend on it (kgdb, ...).
@@@ -227,7 -226,7 +227,7 @@@ ENTRY(secondary_startup_64
        movq    %rax, %cr0
  
        /* Setup a boot time stack */
-       movq stack_start(%rip), %rsp
+       movq initial_stack(%rip), %rsp
  
        /* zero EFLAGS after setting rsp */
        pushq $0
@@@ -311,7 -310,7 +311,7 @@@ ENDPROC(secondary_startup_64
   * start_secondary().
   */
  ENTRY(start_cpu0)
-       movq stack_start(%rip),%rsp
+       movq initial_stack(%rip),%rsp
        movq    initial_code(%rip),%rax
        pushq   $0              # fake return address to stop unwinder
        pushq   $__KERNEL_CS    # set correct cs
  ENDPROC(start_cpu0)
  #endif
  
-       /* SMP bootup changes these two */
+       /* Both SMP bootup and ACPI suspend change these variables */
        __REFDATA
        .balign 8
        GLOBAL(initial_code)
        .quad   x86_64_start_kernel
        GLOBAL(initial_gs)
        .quad   INIT_PER_CPU_VAR(irq_stack_union)
-       GLOBAL(stack_start)
+       GLOBAL(initial_stack)
        .quad  init_thread_union+THREAD_SIZE-8
-       .word  0
        __FINITDATA
  
  bad_address:
@@@ -489,12 -486,10 +487,12 @@@ early_gdt_descr_base
  ENTRY(phys_base)
        /* This must match the first entry in level2_kernel_pgt */
        .quad   0x0000000000000000
 +EXPORT_SYMBOL(phys_base)
  
  #include "../../x86/xen/xen-head.S"
        
        __PAGE_ALIGNED_BSS
  NEXT_PAGE(empty_zero_page)
        .skip PAGE_SIZE
 +EXPORT_SYMBOL(empty_zero_page)
  
diff --combined arch/x86/lib/memcpy_64.S
index 94c917af968801c643f396d64f051598f24e4e15,49e6ebac7e73e33b0a03327cb65c95a29afc1c67..779782f5832476582becc24e5a0f0f5b10ea0b53
@@@ -4,7 -4,6 +4,7 @@@
  #include <asm/errno.h>
  #include <asm/cpufeatures.h>
  #include <asm/alternative-asm.h>
 +#include <asm/export.h>
  
  /*
   * We build a jump to memcpy_orig by default which gets NOPped out on
@@@ -41,8 -40,6 +41,8 @@@ ENTRY(memcpy
        ret
  ENDPROC(memcpy)
  ENDPROC(__memcpy)
 +EXPORT_SYMBOL(memcpy)
 +EXPORT_SYMBOL(__memcpy)
  
  /*
   * memcpy_erms() - enhanced fast string memcpy. This is faster and
@@@ -184,11 -181,11 +184,11 @@@ ENDPROC(memcpy_orig
  
  #ifndef CONFIG_UML
  /*
-  * memcpy_mcsafe - memory copy with machine check exception handling
+  * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
   * Note that we only catch machine checks when reading the source addresses.
   * Writes to target are posted and don't generate machine checks.
   */
- ENTRY(memcpy_mcsafe)
+ ENTRY(memcpy_mcsafe_unrolled)
        cmpl $8, %edx
        /* Less than 8 bytes? Go to byte copy loop */
        jb .L_no_whole_words
  .L_done_memcpy_trap:
        xorq %rax, %rax
        ret
- ENDPROC(memcpy_mcsafe)
- EXPORT_SYMBOL_GPL(memcpy_mcsafe)
+ ENDPROC(memcpy_mcsafe_unrolled)
++EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
  
        .section .fixup, "ax"
        /* Return -EFAULT for any failure */
diff --combined drivers/acpi/internal.h
index 3797155bf2169584ab54bc4d6f1c84f9bdfad88e,f26fc1d7cfea8e63951b32787b7c08060d26e455..70ebd769712bd29ae638fb13523c18ccf6e0659d
@@@ -40,10 -40,8 +40,8 @@@ int acpi_sysfs_init(void)
  void acpi_container_init(void);
  void acpi_memory_hotplug_init(void);
  #ifdef        CONFIG_ACPI_HOTPLUG_IOAPIC
- int acpi_ioapic_add(struct acpi_pci_root *root);
  int acpi_ioapic_remove(struct acpi_pci_root *root);
  #else
- static inline int acpi_ioapic_add(struct acpi_pci_root *root) { return 0; }
  static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; }
  #endif
  #ifdef CONFIG_ACPI_DOCK
@@@ -116,6 -114,7 +114,6 @@@ bool acpi_device_is_present(struct acpi
  bool acpi_device_is_battery(struct acpi_device *adev);
  bool acpi_device_is_first_physical_node(struct acpi_device *adev,
                                        const struct device *dev);
 -struct device *acpi_get_first_physical_node(struct acpi_device *adev);
  
  /* --------------------------------------------------------------------------
                       Device Matching and Notification
@@@ -173,8 -172,6 +171,8 @@@ struct acpi_ec 
        struct work_struct work;
        unsigned long timestamp;
        unsigned long nr_pending_queries;
 +      bool saved_busy_polling;
 +      unsigned int saved_polling_guard;
  };
  
  extern struct acpi_ec *first_ec;
@@@ -186,9 -183,9 +184,9 @@@ typedef int (*acpi_ec_query_func) (voi
  int acpi_ec_init(void);
  int acpi_ec_ecdt_probe(void);
  int acpi_ec_dsdt_probe(void);
 +int acpi_ec_ecdt_start(void);
  void acpi_ec_block_transactions(void);
  void acpi_ec_unblock_transactions(void);
 -void acpi_ec_unblock_transactions_early(void);
  int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
                              acpi_handle handle, acpi_ec_query_func func,
                              void *data);
diff --combined drivers/md/raid5.c
index 5883ef0d95bf9a6f17eadb243fc40d61cc9909ed,1103e7388c23d1c666c6dabe83b289b2149033b6..766c3b7cf0b8f5278a4d79dc1c569625b6dd7697
@@@ -2423,10 -2423,10 +2423,10 @@@ static void raid5_end_read_request(stru
                }
        }
        rdev_dec_pending(rdev, conf->mddev);
 +      bio_reset(bi);
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
        raid5_release_stripe(sh);
 -      bio_reset(bi);
  }
  
  static void raid5_end_write_request(struct bio *bi)
        if (sh->batch_head && bi->bi_error && !replacement)
                set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
  
 +      bio_reset(bi);
        if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
                clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
  
        if (sh->batch_head && sh != sh->batch_head)
                raid5_release_stripe(sh->batch_head);
 -      bio_reset(bi);
  }
  
  static void raid5_build_block(struct stripe_head *sh, int i, int previous)
@@@ -6349,22 -6349,20 +6349,20 @@@ static int alloc_scratch_buffer(struct 
        return 0;
  }
  
- static void raid5_free_percpu(struct r5conf *conf)
+ static int raid456_cpu_dead(unsigned int cpu, struct hlist_node *node)
  {
-       unsigned long cpu;
+       struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
+       free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
+       return 0;
+ }
  
+ static void raid5_free_percpu(struct r5conf *conf)
+ {
        if (!conf->percpu)
                return;
  
- #ifdef CONFIG_HOTPLUG_CPU
-       unregister_cpu_notifier(&conf->cpu_notify);
- #endif
-       get_online_cpus();
-       for_each_possible_cpu(cpu)
-               free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
-       put_online_cpus();
+       cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
        free_percpu(conf->percpu);
  }
  
@@@ -6383,64 -6381,28 +6381,28 @@@ static void free_conf(struct r5conf *co
        kfree(conf);
  }
  
- #ifdef CONFIG_HOTPLUG_CPU
- static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
-                             void *hcpu)
+ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
  {
-       struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify);
-       long cpu = (long)hcpu;
+       struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
        struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
  
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               if (alloc_scratch_buffer(conf, percpu)) {
-                       pr_err("%s: failed memory allocation for cpu%ld\n",
-                              __func__, cpu);
-                       return notifier_from_errno(-ENOMEM);
-               }
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-               free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
-               break;
-       default:
-               break;
+       if (alloc_scratch_buffer(conf, percpu)) {
+               pr_err("%s: failed memory allocation for cpu%u\n",
+                      __func__, cpu);
+               return -ENOMEM;
        }
-       return NOTIFY_OK;
+       return 0;
  }
- #endif
  
  static int raid5_alloc_percpu(struct r5conf *conf)
  {
-       unsigned long cpu;
        int err = 0;
  
        conf->percpu = alloc_percpu(struct raid5_percpu);
        if (!conf->percpu)
                return -ENOMEM;
  
- #ifdef CONFIG_HOTPLUG_CPU
-       conf->cpu_notify.notifier_call = raid456_cpu_notify;
-       conf->cpu_notify.priority = 0;
-       err = register_cpu_notifier(&conf->cpu_notify);
-       if (err)
-               return err;
- #endif
-       get_online_cpus();
-       for_each_present_cpu(cpu) {
-               err = alloc_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
-               if (err) {
-                       pr_err("%s: failed memory allocation for cpu%ld\n",
-                              __func__, cpu);
-                       break;
-               }
-       }
-       put_online_cpus();
+       err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
        if (!err) {
                conf->scribble_disks = max(conf->raid_disks,
                        conf->previous_raid_disks);
@@@ -6639,16 -6601,6 +6601,16 @@@ static struct r5conf *setup_conf(struc
        }
  
        conf->min_nr_stripes = NR_STRIPES;
 +      if (mddev->reshape_position != MaxSector) {
 +              int stripes = max_t(int,
 +                      ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4,
 +                      ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4);
 +              conf->min_nr_stripes = max(NR_STRIPES, stripes);
 +              if (conf->min_nr_stripes != NR_STRIPES)
 +                      printk(KERN_INFO
 +                              "md/raid:%s: force stripe size %d for reshape\n",
 +                              mdname(mddev), conf->min_nr_stripes);
 +      }
        memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
                 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
        atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
@@@ -7066,8 -7018,6 +7028,8 @@@ static int raid5_run(struct mddev *mdde
                else
                        queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
                                                mddev->queue);
 +
 +              blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
        }
  
        if (journal_dev) {
@@@ -7987,10 -7937,21 +7949,21 @@@ static struct md_personality raid4_pers
  
  static int __init raid5_init(void)
  {
+       int ret;
        raid5_wq = alloc_workqueue("raid5wq",
                WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0);
        if (!raid5_wq)
                return -ENOMEM;
+       ret = cpuhp_setup_state_multi(CPUHP_MD_RAID5_PREPARE,
+                                     "md/raid5:prepare",
+                                     raid456_cpu_up_prepare,
+                                     raid456_cpu_dead);
+       if (ret) {
+               destroy_workqueue(raid5_wq);
+               return ret;
+       }
        register_md_personality(&raid6_personality);
        register_md_personality(&raid5_personality);
        register_md_personality(&raid4_personality);
@@@ -8002,6 -7963,7 +7975,7 @@@ static void raid5_exit(void
        unregister_md_personality(&raid6_personality);
        unregister_md_personality(&raid5_personality);
        unregister_md_personality(&raid4_personality);
+       cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
        destroy_workqueue(raid5_wq);
  }
  
index 8e4252dd9a9db7a406bda66321b2db4da631de5b,b74548728fb5e170080a2ca421c826a161ef38f7..32f0cc4256b3eefb3052d32c84a9c5c4346e9d10
@@@ -382,7 -382,8 +382,8 @@@ struct mvneta_port 
        struct mvneta_rx_queue *rxqs;
        struct mvneta_tx_queue *txqs;
        struct net_device *dev;
-       struct notifier_block cpu_notifier;
+       struct hlist_node node_online;
+       struct hlist_node node_dead;
        int rxq_def;
        /* Protect the access to the percpu interrupt registers,
         * ensuring that the configuration remains coherent.
        u16 rx_ring_size;
  
        struct mii_bus *mii_bus;
 -      struct phy_device *phy_dev;
        phy_interface_t phy_interface;
        struct device_node *phy_node;
        unsigned int link;
@@@ -573,6 -575,7 +574,7 @@@ struct mvneta_rx_queue 
        int next_desc_to_proc;
  };
  
+ static enum cpuhp_state online_hpstate;
  /* The hardware supports eight (8) rx queues, but we are only allowing
   * the first one to be used. Therefore, let's just allocate one queue.
   */
@@@ -2650,7 -2653,6 +2652,7 @@@ static int mvneta_poll(struct napi_stru
        u32 cause_rx_tx;
        int rx_queue;
        struct mvneta_port *pp = netdev_priv(napi->dev);
 +      struct net_device *ndev = pp->dev;
        struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
  
        if (!netif_running(pp->dev)) {
                                (MVNETA_CAUSE_PHY_STATUS_CHANGE |
                                 MVNETA_CAUSE_LINK_CHANGE |
                                 MVNETA_CAUSE_PSC_SYNC_CHANGE))) {
 -                      mvneta_fixed_link_update(pp, pp->phy_dev);
 +                      mvneta_fixed_link_update(pp, ndev->phydev);
                }
        }
  
@@@ -2963,7 -2965,6 +2965,7 @@@ static int mvneta_setup_txqs(struct mvn
  static void mvneta_start_dev(struct mvneta_port *pp)
  {
        int cpu;
 +      struct net_device *ndev = pp->dev;
  
        mvneta_max_rx_size_set(pp, pp->pkt_size);
        mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
                    MVNETA_CAUSE_LINK_CHANGE |
                    MVNETA_CAUSE_PSC_SYNC_CHANGE);
  
 -      phy_start(pp->phy_dev);
 +      phy_start(ndev->phydev);
        netif_tx_start_all_queues(pp->dev);
  }
  
  static void mvneta_stop_dev(struct mvneta_port *pp)
  {
        unsigned int cpu;
 +      struct net_device *ndev = pp->dev;
  
 -      phy_stop(pp->phy_dev);
 +      phy_stop(ndev->phydev);
  
        for_each_online_cpu(cpu) {
                struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
@@@ -3168,7 -3168,7 +3170,7 @@@ static int mvneta_set_mac_addr(struct n
  static void mvneta_adjust_link(struct net_device *ndev)
  {
        struct mvneta_port *pp = netdev_priv(ndev);
 -      struct phy_device *phydev = pp->phy_dev;
 +      struct phy_device *phydev = ndev->phydev;
        int status_change = 0;
  
        if (phydev->link) {
@@@ -3246,6 -3246,7 +3248,6 @@@ static int mvneta_mdio_probe(struct mvn
        phy_dev->supported &= PHY_GBIT_FEATURES;
        phy_dev->advertising = phy_dev->supported;
  
 -      pp->phy_dev = phy_dev;
        pp->link    = 0;
        pp->duplex  = 0;
        pp->speed   = 0;
  
  static void mvneta_mdio_remove(struct mvneta_port *pp)
  {
 -      phy_disconnect(pp->phy_dev);
 -      pp->phy_dev = NULL;
 +      struct net_device *ndev = pp->dev;
 +
 +      phy_disconnect(ndev->phydev);
  }
  
  /* Electing a CPU must be done in an atomic way: it should be done
@@@ -3313,101 -3313,104 +3315,104 @@@ static void mvneta_percpu_elect(struct 
        }
  };
  
- static int mvneta_percpu_notifier(struct notifier_block *nfb,
-                                 unsigned long action, void *hcpu)
+ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node)
  {
-       struct mvneta_port *pp = container_of(nfb, struct mvneta_port,
-                                             cpu_notifier);
-       int cpu = (unsigned long)hcpu, other_cpu;
+       int other_cpu;
+       struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+                                                 node_online);
        struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
  
-       switch (action) {
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-       case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-               spin_lock(&pp->lock);
-               /* Configuring the driver for a new CPU while the
-                * driver is stopping is racy, so just avoid it.
-                */
-               if (pp->is_stopped) {
-                       spin_unlock(&pp->lock);
-                       break;
-               }
-               netif_tx_stop_all_queues(pp->dev);
  
-               /* We have to synchronise on tha napi of each CPU
-                * except the one just being waked up
-                */
-               for_each_online_cpu(other_cpu) {
-                       if (other_cpu != cpu) {
-                               struct mvneta_pcpu_port *other_port =
-                                       per_cpu_ptr(pp->ports, other_cpu);
+       spin_lock(&pp->lock);
+       /*
+        * Configuring the driver for a new CPU while the driver is
+        * stopping is racy, so just avoid it.
+        */
+       if (pp->is_stopped) {
+               spin_unlock(&pp->lock);
+               return 0;
+       }
+       netif_tx_stop_all_queues(pp->dev);
  
-                               napi_synchronize(&other_port->napi);
-                       }
+       /*
+        * We have to synchronise on tha napi of each CPU except the one
+        * just being woken up
+        */
+       for_each_online_cpu(other_cpu) {
+               if (other_cpu != cpu) {
+                       struct mvneta_pcpu_port *other_port =
+                               per_cpu_ptr(pp->ports, other_cpu);
+                       napi_synchronize(&other_port->napi);
                }
+       }
  
-               /* Mask all ethernet port interrupts */
-               on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
-               napi_enable(&port->napi);
+       /* Mask all ethernet port interrupts */
+       on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+       napi_enable(&port->napi);
  
+       /*
+        * Enable per-CPU interrupts on the CPU that is
+        * brought up.
+        */
+       mvneta_percpu_enable(pp);
  
-               /* Enable per-CPU interrupts on the CPU that is
-                * brought up.
-                */
-               mvneta_percpu_enable(pp);
+       /*
+        * Enable per-CPU interrupt on the one CPU we care
+        * about.
+        */
+       mvneta_percpu_elect(pp);
  
-               /* Enable per-CPU interrupt on the one CPU we care
-                * about.
-                */
-               mvneta_percpu_elect(pp);
-               /* Unmask all ethernet port interrupts */
-               on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
-               mvreg_write(pp, MVNETA_INTR_MISC_MASK,
-                       MVNETA_CAUSE_PHY_STATUS_CHANGE |
-                       MVNETA_CAUSE_LINK_CHANGE |
-                       MVNETA_CAUSE_PSC_SYNC_CHANGE);
-               netif_tx_start_all_queues(pp->dev);
-               spin_unlock(&pp->lock);
-               break;
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-               netif_tx_stop_all_queues(pp->dev);
-               /* Thanks to this lock we are sure that any pending
-                * cpu election is done
-                */
-               spin_lock(&pp->lock);
-               /* Mask all ethernet port interrupts */
-               on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
-               spin_unlock(&pp->lock);
+       /* Unmask all ethernet port interrupts */
+       on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+       mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+                   MVNETA_CAUSE_PHY_STATUS_CHANGE |
+                   MVNETA_CAUSE_LINK_CHANGE |
+                   MVNETA_CAUSE_PSC_SYNC_CHANGE);
+       netif_tx_start_all_queues(pp->dev);
+       spin_unlock(&pp->lock);
+       return 0;
+ }
  
-               napi_synchronize(&port->napi);
-               napi_disable(&port->napi);
-               /* Disable per-CPU interrupts on the CPU that is
-                * brought down.
-                */
-               mvneta_percpu_disable(pp);
+ static int mvneta_cpu_down_prepare(unsigned int cpu, struct hlist_node *node)
+ {
+       struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+                                                 node_online);
+       struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
  
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               /* Check if a new CPU must be elected now this on is down */
-               spin_lock(&pp->lock);
-               mvneta_percpu_elect(pp);
-               spin_unlock(&pp->lock);
-               /* Unmask all ethernet port interrupts */
-               on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
-               mvreg_write(pp, MVNETA_INTR_MISC_MASK,
-                       MVNETA_CAUSE_PHY_STATUS_CHANGE |
-                       MVNETA_CAUSE_LINK_CHANGE |
-                       MVNETA_CAUSE_PSC_SYNC_CHANGE);
-               netif_tx_start_all_queues(pp->dev);
-               break;
-       }
+       /*
+        * Thanks to this lock we are sure that any pending cpu election is
+        * done.
+        */
+       spin_lock(&pp->lock);
+       /* Mask all ethernet port interrupts */
+       on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+       spin_unlock(&pp->lock);
  
-       return NOTIFY_OK;
+       napi_synchronize(&port->napi);
+       napi_disable(&port->napi);
+       /* Disable per-CPU interrupts on the CPU that is brought down. */
+       mvneta_percpu_disable(pp);
+       return 0;
+ }
+ static int mvneta_cpu_dead(unsigned int cpu, struct hlist_node *node)
+ {
+       struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+                                                 node_dead);
+       /* Check if a new CPU must be elected now this on is down */
+       spin_lock(&pp->lock);
+       mvneta_percpu_elect(pp);
+       spin_unlock(&pp->lock);
+       /* Unmask all ethernet port interrupts */
+       on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+       mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+                   MVNETA_CAUSE_PHY_STATUS_CHANGE |
+                   MVNETA_CAUSE_LINK_CHANGE |
+                   MVNETA_CAUSE_PSC_SYNC_CHANGE);
+       netif_tx_start_all_queues(pp->dev);
+       return 0;
  }
  
  static int mvneta_open(struct net_device *dev)
        /* Register a CPU notifier to handle the case where our CPU
         * might be taken offline.
         */
-       register_cpu_notifier(&pp->cpu_notifier);
+       ret = cpuhp_state_add_instance_nocalls(online_hpstate,
+                                              &pp->node_online);
+       if (ret)
+               goto err_free_irq;
+       ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+                                              &pp->node_dead);
+       if (ret)
+               goto err_free_online_hp;
  
        /* In default link is down */
        netif_carrier_off(pp->dev);
        ret = mvneta_mdio_probe(pp);
        if (ret < 0) {
                netdev_err(dev, "cannot probe MDIO bus\n");
-               goto err_free_irq;
+               goto err_free_dead_hp;
        }
  
        mvneta_start_dev(pp);
  
        return 0;
  
+ err_free_dead_hp:
+       cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+                                           &pp->node_dead);
+ err_free_online_hp:
+       cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
  err_free_irq:
-       unregister_cpu_notifier(&pp->cpu_notifier);
        on_each_cpu(mvneta_percpu_disable, pp, true);
        free_percpu_irq(pp->dev->irq, pp->ports);
  err_cleanup_txqs:
@@@ -3486,7 -3501,10 +3503,10 @@@ static int mvneta_stop(struct net_devic
  
        mvneta_stop_dev(pp);
        mvneta_mdio_remove(pp);
-       unregister_cpu_notifier(&pp->cpu_notifier);
+       cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
+       cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+                                           &pp->node_dead);
        on_each_cpu(mvneta_percpu_disable, pp, true);
        free_percpu_irq(dev->irq, pp->ports);
        mvneta_cleanup_rxqs(pp);
  
  static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
  {
 -      struct mvneta_port *pp = netdev_priv(dev);
 -
 -      if (!pp->phy_dev)
 +      if (!dev->phydev)
                return -ENOTSUPP;
  
 -      return phy_mii_ioctl(pp->phy_dev, ifr, cmd);
 +      return phy_mii_ioctl(dev->phydev, ifr, cmd);
  }
  
  /* Ethtool methods */
  
 -/* Get settings (phy address, speed) for ethtools */
 -int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 +/* Set link ksettings (phy address, speed) for ethtools */
 +int mvneta_ethtool_set_link_ksettings(struct net_device *ndev,
 +                                    const struct ethtool_link_ksettings *cmd)
  {
 -      struct mvneta_port *pp = netdev_priv(dev);
 -
 -      if (!pp->phy_dev)
 -              return -ENODEV;
 -
 -      return phy_ethtool_gset(pp->phy_dev, cmd);
 -}
 -
 -/* Set settings (phy address, speed) for ethtools */
 -int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 -{
 -      struct mvneta_port *pp = netdev_priv(dev);
 -      struct phy_device *phydev = pp->phy_dev;
 +      struct mvneta_port *pp = netdev_priv(ndev);
 +      struct phy_device *phydev = ndev->phydev;
  
        if (!phydev)
                return -ENODEV;
  
 -      if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
 +      if ((cmd->base.autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
                u32 val;
  
 -              mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE);
 +              mvneta_set_autoneg(pp, cmd->base.autoneg == AUTONEG_ENABLE);
  
 -              if (cmd->autoneg == AUTONEG_DISABLE) {
 +              if (cmd->base.autoneg == AUTONEG_DISABLE) {
                        val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
                        val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED |
                                 MVNETA_GMAC_CONFIG_GMII_SPEED |
                        mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
                }
  
 -              pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE);
 +              pp->use_inband_status = (cmd->base.autoneg == AUTONEG_ENABLE);
                netdev_info(pp->dev, "autoneg status set to %i\n",
                            pp->use_inband_status);
  
 -              if (netif_running(dev)) {
 +              if (netif_running(ndev)) {
                        mvneta_port_down(pp);
                        mvneta_port_up(pp);
                }
        }
  
 -      return phy_ethtool_sset(pp->phy_dev, cmd);
 +      return phy_ethtool_ksettings_set(ndev->phydev, cmd);
  }
  
  /* Set interrupt coalescing for ethtools */
@@@ -3811,6 -3841,8 +3831,6 @@@ static const struct net_device_ops mvne
  
  const struct ethtool_ops mvneta_eth_tool_ops = {
        .get_link       = ethtool_op_get_link,
 -      .get_settings   = mvneta_ethtool_get_settings,
 -      .set_settings   = mvneta_ethtool_set_settings,
        .set_coalesce   = mvneta_ethtool_set_coalesce,
        .get_coalesce   = mvneta_ethtool_get_coalesce,
        .get_drvinfo    = mvneta_ethtool_get_drvinfo,
        .get_rxnfc      = mvneta_ethtool_get_rxnfc,
        .get_rxfh       = mvneta_ethtool_get_rxfh,
        .set_rxfh       = mvneta_ethtool_set_rxfh,
 +      .get_link_ksettings = phy_ethtool_get_link_ksettings,
 +      .set_link_ksettings = mvneta_ethtool_set_link_ksettings,
  };
  
  /* Initialize hw */
@@@ -4014,7 -4044,6 +4034,6 @@@ static int mvneta_probe(struct platform
        err = of_property_read_string(dn, "managed", &managed);
        pp->use_inband_status = (err == 0 &&
                                 strcmp(managed, "in-band-status") == 0);
-       pp->cpu_notifier.notifier_call = mvneta_percpu_notifier;
  
        pp->rxq_def = rxq_def;
  
@@@ -4217,7 -4246,42 +4236,42 @@@ static struct platform_driver mvneta_dr
        },
  };
  
- module_platform_driver(mvneta_driver);
+ static int __init mvneta_driver_init(void)
+ {
+       int ret;
+       ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvmeta:online",
+                                     mvneta_cpu_online,
+                                     mvneta_cpu_down_prepare);
+       if (ret < 0)
+               goto out;
+       online_hpstate = ret;
+       ret = cpuhp_setup_state_multi(CPUHP_NET_MVNETA_DEAD, "net/mvneta:dead",
+                                     NULL, mvneta_cpu_dead);
+       if (ret)
+               goto err_dead;
+       ret = platform_driver_register(&mvneta_driver);
+       if (ret)
+               goto err;
+       return 0;
+ err:
+       cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD);
+ err_dead:
+       cpuhp_remove_multi_state(online_hpstate);
+ out:
+       return ret;
+ }
+ module_init(mvneta_driver_init);
+ static void __exit mvneta_driver_exit(void)
+ {
+       platform_driver_unregister(&mvneta_driver);
+       cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD);
+       cpuhp_remove_multi_state(online_hpstate);
+ }
+ module_exit(mvneta_driver_exit);
  
  MODULE_DESCRIPTION("Marvell NETA Ethernet Driver - www.marvell.com");
  MODULE_AUTHOR("Rami Rosen <rosenr@marvell.com>, Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
diff --combined drivers/perf/arm_pmu.c
index 56ad3b03af8a5caf5cd6a7f2f8b55d606ed79581,cecdb1ef68728ad38ca8bb052046bee3ce52fbb4..278122e0eaffaea6d7e10fb341d0fac8c778c782
@@@ -534,24 -534,6 +534,24 @@@ static int armpmu_filter_match(struct p
        return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
  }
  
 +static ssize_t armpmu_cpumask_show(struct device *dev,
 +                                 struct device_attribute *attr, char *buf)
 +{
 +      struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev));
 +      return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus);
 +}
 +
 +static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL);
 +
 +static struct attribute *armpmu_common_attrs[] = {
 +      &dev_attr_cpus.attr,
 +      NULL,
 +};
 +
 +static struct attribute_group armpmu_common_attr_group = {
 +      .attrs = armpmu_common_attrs,
 +};
 +
  static void armpmu_init(struct arm_pmu *armpmu)
  {
        atomic_set(&armpmu->active_events, 0);
                .stop           = armpmu_stop,
                .read           = armpmu_read,
                .filter_match   = armpmu_filter_match,
 +              .attr_groups    = armpmu->attr_groups,
        };
 +      armpmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
 +              &armpmu_common_attr_group;
  }
  
  /* Set at runtime when we know what CPU type we are. */
@@@ -623,7 -602,7 +623,7 @@@ static void cpu_pmu_free_irq(struct arm
        irqs = min(pmu_device->num_resources, num_possible_cpus());
  
        irq = platform_get_irq(pmu_device, 0);
 -      if (irq >= 0 && irq_is_percpu(irq)) {
 +      if (irq > 0 && irq_is_percpu(irq)) {
                on_each_cpu_mask(&cpu_pmu->supported_cpus,
                                 cpu_pmu_disable_percpu_irq, &irq, 1);
                free_percpu_irq(irq, &hw_events->percpu_pmu);
                        if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
                                continue;
                        irq = platform_get_irq(pmu_device, i);
 -                      if (irq >= 0)
 +                      if (irq > 0)
                                free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
                }
        }
@@@ -659,7 -638,7 +659,7 @@@ static int cpu_pmu_request_irq(struct a
        }
  
        irq = platform_get_irq(pmu_device, 0);
 -      if (irq >= 0 && irq_is_percpu(irq)) {
 +      if (irq > 0 && irq_is_percpu(irq)) {
                err = request_percpu_irq(irq, handler, "arm-pmu",
                                         &hw_events->percpu_pmu);
                if (err) {
        return 0;
  }
  
- static DEFINE_SPINLOCK(arm_pmu_lock);
- static LIST_HEAD(arm_pmu_list);
  /*
   * PMU hardware loses all context when a CPU goes offline.
   * When a CPU is hotplugged back in, since some hardware registers are
   * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
   * junk values out of them.
   */
- static int arm_perf_starting_cpu(unsigned int cpu)
+ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
  {
-       struct arm_pmu *pmu;
-       spin_lock(&arm_pmu_lock);
-       list_for_each_entry(pmu, &arm_pmu_list, entry) {
+       struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
  
-               if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
-                       continue;
-               if (pmu->reset)
-                       pmu->reset(pmu);
-       }
-       spin_unlock(&arm_pmu_lock);
+       if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+               return 0;
+       if (pmu->reset)
+               pmu->reset(pmu);
        return 0;
  }
  
@@@ -842,9 -813,10 +834,10 @@@ static int cpu_pmu_init(struct arm_pmu 
        if (!cpu_hw_events)
                return -ENOMEM;
  
-       spin_lock(&arm_pmu_lock);
-       list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
-       spin_unlock(&arm_pmu_lock);
+       err = cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+                                              &cpu_pmu->node);
+       if (err)
+               goto out_free;
  
        err = cpu_pm_pmu_register(cpu_pmu);
        if (err)
        return 0;
  
  out_unregister:
-       spin_lock(&arm_pmu_lock);
-       list_del(&cpu_pmu->entry);
-       spin_unlock(&arm_pmu_lock);
+       cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+                                           &cpu_pmu->node);
+ out_free:
        free_percpu(cpu_hw_events);
        return err;
  }
  static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
  {
        cpu_pm_pmu_unregister(cpu_pmu);
-       spin_lock(&arm_pmu_lock);
-       list_del(&cpu_pmu->entry);
-       spin_unlock(&arm_pmu_lock);
+       cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+                                           &cpu_pmu->node);
        free_percpu(cpu_pmu->hw_events);
  }
  
@@@ -940,7 -911,7 +932,7 @@@ static int of_pmu_irq_cfg(struct arm_pm
  
                /* Check the IRQ type and prohibit a mix of PPIs and SPIs */
                irq = platform_get_irq(pdev, i);
 -              if (irq >= 0) {
 +              if (irq > 0) {
                        bool spi = !irq_is_percpu(irq);
  
                        if (i > 0 && spi != using_spi) {
        if (cpumask_weight(&pmu->supported_cpus) == 0) {
                int irq = platform_get_irq(pdev, 0);
  
 -              if (irq >= 0 && irq_is_percpu(irq)) {
 -                      /* If using PPIs, check the affinity of the partition */
 +              if (irq > 0 && irq_is_percpu(irq)) {
                        int ret;
  
                        ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
@@@ -1059,7 -1031,6 +1051,7 @@@ int arm_pmu_device_probe(struct platfor
                goto out_free;
        }
  
 +
        ret = cpu_pmu_init(pmu);
        if (ret)
                goto out_free;
@@@ -1090,9 -1061,9 +1082,9 @@@ static int arm_pmu_hp_init(void
  {
        int ret;
  
-       ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_STARTING,
-                                       "AP_PERF_ARM_STARTING",
-                                       arm_perf_starting_cpu, NULL);
+       ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
+                                     "AP_PERF_ARM_STARTING",
+                                     arm_perf_starting_cpu, NULL);
        if (ret)
                pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
                       ret);
diff --combined include/linux/acpi.h
index e7465528b8dcfe52de57663cfb5aedb3b8ed229c,7c52e06fdf9190bc2113cf29f6e72e9a4504ab8a..6d0816ba84d880af4013c25c46db35f2a0dce1d6
@@@ -85,8 -85,6 +85,8 @@@ static inline const char *acpi_dev_name
        return dev_name(&adev->dev);
  }
  
 +struct device *acpi_get_first_physical_node(struct acpi_device *adev);
 +
  enum acpi_irq_model_id {
        ACPI_IRQ_MODEL_PIC = 0,
        ACPI_IRQ_MODEL_IOAPIC,
@@@ -636,11 -634,6 +636,11 @@@ static inline const char *acpi_dev_name
        return NULL;
  }
  
 +static inline struct device *acpi_get_first_physical_node(struct acpi_device *adev)
 +{
 +      return NULL;
 +}
 +
  static inline void acpi_early_init(void) { }
  static inline void acpi_subsystem_init(void) { }
  
@@@ -758,6 -751,12 +758,12 @@@ static inline int acpi_reconfig_notifie
  
  #endif        /* !CONFIG_ACPI */
  
+ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+ int acpi_ioapic_add(acpi_handle root);
+ #else
+ static inline int acpi_ioapic_add(acpi_handle root) { return 0; }
+ #endif
  #ifdef CONFIG_ACPI
  void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
                               u32 pm1a_ctrl,  u32 pm1b_ctrl));
diff --combined include/linux/compiler.h
index 0e0faff63bd2d7e98c5a0eead16f2aa445d9afd3,668569844d37cef4d51c6c7652d5f34ded2a66d6..cf0fa5d86059b6672773025b625027f6301074c1
@@@ -182,29 -182,6 +182,29 @@@ void ftrace_likely_update(struct ftrace
  # define unreachable() do { } while (1)
  #endif
  
 +/*
 + * KENTRY - kernel entry point
 + * This can be used to annotate symbols (functions or data) that are used
 + * without their linker symbol being referenced explicitly. For example,
 + * interrupt vector handlers, or functions in the kernel image that are found
 + * programatically.
 + *
 + * Not required for symbols exported with EXPORT_SYMBOL, or initcalls. Those
 + * are handled in their own way (with KEEP() in linker scripts).
 + *
 + * KENTRY can be avoided if the symbols in question are marked as KEEP() in the
 + * linker script. For example an architecture could KEEP() its entire
 + * boot/exception vector code rather than annotate each function and data.
 + */
 +#ifndef KENTRY
 +# define KENTRY(sym)                                          \
 +      extern typeof(sym) sym;                                 \
 +      static const unsigned long __kentry_##sym               \
 +      __used                                                  \
 +      __attribute__((section("___kentry" "+" #sym ), used))   \
 +      = (unsigned long)&sym;
 +#endif
 +
  #ifndef RELOC_HIDE
  # define RELOC_HIDE(ptr, off)                                 \
    ({ unsigned long __ptr;                                     \
@@@ -429,10 -406,6 +429,10 @@@ static __always_inline void __write_onc
  # define __attribute_const__  /* unimplemented */
  #endif
  
 +#ifndef __latent_entropy
 +# define __latent_entropy
 +#endif
 +
  /*
   * Tell gcc if a function is cold. The compiler will assume any path
   * directly leading to the call is unlikely.
   * object's lifetime is managed by something other than RCU.  That
   * "something other" might be reference counting or simple immortality.
   *
-  * The seemingly unused size_t variable is to validate @p is indeed a pointer
-  * type by making sure it can be dereferenced.
+  * The seemingly unused variable ___typecheck_p validates that @p is
+  * indeed a pointer type by using a pointer to typeof(*p) as the type.
+  * Taking a pointer to typeof(*p) again is needed in case p is void *.
   */
  #define lockless_dereference(p) \
  ({ \
        typeof(p) _________p1 = READ_ONCE(p); \
-       size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \
+       typeof(*(p)) *___typecheck_p __maybe_unused; \
        smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
        (_________p1); \
  })
diff --combined include/linux/cpu.h
index ad4f1f33a74e93050cb110fdf6f8f512c0c11499,6bf1992fe638d00b1739dd7e838a10e45906a1a5..7572d9e9dced921e1732226a2c00f5802f58c735
@@@ -61,17 -61,8 +61,8 @@@ struct notifier_block
  #define CPU_DOWN_PREPARE      0x0005 /* CPU (unsigned)v going down */
  #define CPU_DOWN_FAILED               0x0006 /* CPU (unsigned)v NOT going down */
  #define CPU_DEAD              0x0007 /* CPU (unsigned)v dead */
- #define CPU_DYING             0x0008 /* CPU (unsigned)v not running any task,
-                                       * not handling interrupts, soon dead.
-                                       * Called on the dying cpu, interrupts
-                                       * are already disabled. Must not
-                                       * sleep, must not fail */
  #define CPU_POST_DEAD         0x0009 /* CPU (unsigned)v dead, cpu_hotplug
                                        * lock is dropped */
- #define CPU_STARTING          0x000A /* CPU (unsigned)v soon running.
-                                       * Called on the new cpu, just before
-                                       * enabling interrupts. Must not sleep,
-                                       * must not fail */
  #define CPU_BROKEN            0x000B /* CPU (unsigned)v did not die properly,
                                        * perhaps due to preemption. */
  
@@@ -86,9 -77,6 +77,6 @@@
  #define CPU_DOWN_PREPARE_FROZEN       (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
  #define CPU_DOWN_FAILED_FROZEN        (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
  #define CPU_DEAD_FROZEN               (CPU_DEAD | CPU_TASKS_FROZEN)
- #define CPU_DYING_FROZEN      (CPU_DYING | CPU_TASKS_FROZEN)
- #define CPU_STARTING_FROZEN   (CPU_STARTING | CPU_TASKS_FROZEN)
  
  #ifdef CONFIG_SMP
  extern bool cpuhp_tasks_frozen;
@@@ -228,11 -216,7 +216,11 @@@ static inline void cpu_hotplug_done(voi
  #endif                /* CONFIG_HOTPLUG_CPU */
  
  #ifdef CONFIG_PM_SLEEP_SMP
 -extern int disable_nonboot_cpus(void);
 +extern int freeze_secondary_cpus(int primary);
 +static inline int disable_nonboot_cpus(void)
 +{
 +      return freeze_secondary_cpus(0);
 +}
  extern void enable_nonboot_cpus(void);
  #else /* !CONFIG_PM_SLEEP_SMP */
  static inline int disable_nonboot_cpus(void) { return 0; }
index 9177041fed2c047a118333553d63e9289f545189,afd59e2ca4b31bf43b5d87cd468d59f96b9fe7f1..211c1287e7d6f157f6956f89da5b285003179ed5
@@@ -14,13 -14,25 +14,25 @@@ enum cpuhp_state 
        CPUHP_PERF_SUPERH,
        CPUHP_X86_HPET_DEAD,
        CPUHP_X86_APB_DEAD,
+       CPUHP_VIRT_NET_DEAD,
+       CPUHP_SLUB_DEAD,
+       CPUHP_MM_WRITEBACK_DEAD,
+       CPUHP_SOFTIRQ_DEAD,
+       CPUHP_NET_MVNETA_DEAD,
+       CPUHP_CPUIDLE_DEAD,
        CPUHP_WORKQUEUE_PREP,
        CPUHP_POWER_NUMA_PREPARE,
        CPUHP_HRTIMERS_PREPARE,
        CPUHP_PROFILE_PREPARE,
        CPUHP_X2APIC_PREPARE,
        CPUHP_SMPCFD_PREPARE,
+       CPUHP_RELAY_PREPARE,
+       CPUHP_SLAB_PREPARE,
+       CPUHP_MD_RAID5_PREPARE,
        CPUHP_RCUTREE_PREP,
+       CPUHP_CPUIDLE_COUPLED_PREPARE,
+       CPUHP_POWERPC_PMAC_PREPARE,
+       CPUHP_POWERPC_MMU_CTX_PREPARE,
        CPUHP_NOTIFY_PREPARE,
        CPUHP_TIMERS_DEAD,
        CPUHP_BRINGUP_CPU,
@@@ -45,8 -57,6 +57,8 @@@
        CPUHP_AP_PERF_METAG_STARTING,
        CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
        CPUHP_AP_ARM_VFP_STARTING,
 +      CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
 +      CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
        CPUHP_AP_PERF_ARM_STARTING,
        CPUHP_AP_ARM_L2X0_STARTING,
        CPUHP_AP_ARM_ARCH_TIMER_STARTING,
@@@ -70,7 -80,6 +82,6 @@@
        CPUHP_AP_ARM64_ISNDEP_STARTING,
        CPUHP_AP_SMPCFD_DYING,
        CPUHP_AP_X86_TBOOT_DYING,
-       CPUHP_AP_NOTIFY_STARTING,
        CPUHP_AP_ONLINE,
        CPUHP_TEARDOWN_CPU,
        CPUHP_AP_ONLINE_IDLE,
@@@ -88,7 -97,6 +99,7 @@@
        CPUHP_AP_PERF_S390_SF_ONLINE,
        CPUHP_AP_PERF_ARM_CCI_ONLINE,
        CPUHP_AP_PERF_ARM_CCN_ONLINE,
 +      CPUHP_AP_PERF_ARM_L2X0_ONLINE,
        CPUHP_AP_WORKQUEUE_ONLINE,
        CPUHP_AP_RCUTREE_ONLINE,
        CPUHP_AP_NOTIFY_ONLINE,
  
  int __cpuhp_setup_state(enum cpuhp_state state,       const char *name, bool invoke,
                        int (*startup)(unsigned int cpu),
-                       int (*teardown)(unsigned int cpu));
+                       int (*teardown)(unsigned int cpu), bool multi_instance);
  
  /**
   * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
@@@ -119,7 -127,7 +130,7 @@@ static inline int cpuhp_setup_state(enu
                                    int (*startup)(unsigned int cpu),
                                    int (*teardown)(unsigned int cpu))
  {
-       return __cpuhp_setup_state(state, name, true, startup, teardown);
+       return __cpuhp_setup_state(state, name, true, startup, teardown, false);
  }
  
  /**
@@@ -138,7 -146,66 +149,66 @@@ static inline int cpuhp_setup_state_noc
                                            int (*startup)(unsigned int cpu),
                                            int (*teardown)(unsigned int cpu))
  {
-       return __cpuhp_setup_state(state, name, false, startup, teardown);
+       return __cpuhp_setup_state(state, name, false, startup, teardown,
+                                  false);
+ }
+ /**
+  * cpuhp_setup_state_multi - Add callbacks for multi state
+  * @state:    The state for which the calls are installed
+  * @name:     Name of the callback.
+  * @startup:  startup callback function
+  * @teardown: teardown callback function
+  *
+  * Sets the internal multi_instance flag and prepares a state to work as a multi
+  * instance callback. No callbacks are invoked at this point. The callbacks are
+  * invoked once an instance for this state are registered via
+  * @cpuhp_state_add_instance or @cpuhp_state_add_instance_nocalls.
+  */
+ static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
+                                         const char *name,
+                                         int (*startup)(unsigned int cpu,
+                                                        struct hlist_node *node),
+                                         int (*teardown)(unsigned int cpu,
+                                                         struct hlist_node *node))
+ {
+       return __cpuhp_setup_state(state, name, false,
+                                  (void *) startup,
+                                  (void *) teardown, true);
+ }
+ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+                              bool invoke);
+ /**
+  * cpuhp_state_add_instance - Add an instance for a state and invoke startup
+  *                            callback.
+  * @state:    The state for which the instance is installed
+  * @node:     The node for this individual state.
+  *
+  * Installs the instance for the @state and invokes the startup callback on
+  * the present cpus which have already reached the @state. The @state must have
+  * been earlier marked as multi-instance by @cpuhp_setup_state_multi.
+  */
+ static inline int cpuhp_state_add_instance(enum cpuhp_state state,
+                                          struct hlist_node *node)
+ {
+       return __cpuhp_state_add_instance(state, node, true);
+ }
+ /**
+  * cpuhp_state_add_instance_nocalls - Add an instance for a state without
+  *                                    invoking the startup callback.
+  * @state:    The state for which the instance is installed
+  * @node:     The node for this individual state.
+  *
+  * Installs the instance for the @state The @state must have been earlier
+  * marked as multi-instance by @cpuhp_setup_state_multi.
+  */
+ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
+                                                  struct hlist_node *node)
+ {
+       return __cpuhp_state_add_instance(state, node, false);
  }
  
  void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
@@@ -165,6 -232,51 +235,51 @@@ static inline void cpuhp_remove_state_n
        __cpuhp_remove_state(state, false);
  }
  
+ /**
+  * cpuhp_remove_multi_state - Remove hotplug multi state callback
+  * @state:    The state for which the calls are removed
+  *
+  * Removes the callback functions from a multi state. This is the reverse of
+  * cpuhp_setup_state_multi(). All instances should have been removed before
+  * invoking this function.
+  */
+ static inline void cpuhp_remove_multi_state(enum cpuhp_state state)
+ {
+       __cpuhp_remove_state(state, false);
+ }
+ int __cpuhp_state_remove_instance(enum cpuhp_state state,
+                                 struct hlist_node *node, bool invoke);
+ /**
+  * cpuhp_state_remove_instance - Remove hotplug instance from state and invoke
+  *                               the teardown callback
+  * @state:    The state from which the instance is removed
+  * @node:     The node for this individual state.
+  *
+  * Removes the instance and invokes the teardown callback on the present cpus
+  * which have already reached the @state.
+  */
+ static inline int cpuhp_state_remove_instance(enum cpuhp_state state,
+                                             struct hlist_node *node)
+ {
+       return __cpuhp_state_remove_instance(state, node, true);
+ }
+ /**
+  * cpuhp_state_remove_instance_nocalls - Remove hotplug instance from state
+  *                                     without invoking the reatdown callback
+  * @state:    The state from which the instance is removed
+  * @node:     The node for this individual state.
+  *
+  * Removes the instance without invoking the teardown callback.
+  */
+ static inline int cpuhp_state_remove_instance_nocalls(enum cpuhp_state state,
+                                                     struct hlist_node *node)
+ {
+       return __cpuhp_state_remove_instance(state, node, false);
+ }
  #ifdef CONFIG_SMP
  void cpuhp_online_idle(enum cpuhp_state state);
  #else
index a534c7f15a615520d802a78a82fe55d2af8e4cae,595fb46213fc4ae79965af911786b63784cbffd8..a0547c571800e7ca77f9a7ffed9412a16bee9262
@@@ -21,8 -21,6 +21,8 @@@
   *
   * DEFINE_STATIC_KEY_TRUE(key);
   * DEFINE_STATIC_KEY_FALSE(key);
 + * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
 + * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
   * static_branch_likely()
   * static_branch_unlikely()
   *
@@@ -269,19 -267,15 +269,25 @@@ struct static_key_false 
  #define DEFINE_STATIC_KEY_TRUE(name)  \
        struct static_key_true name = STATIC_KEY_TRUE_INIT
  
+ #define DECLARE_STATIC_KEY_TRUE(name) \
+       extern struct static_key_true name
  #define DEFINE_STATIC_KEY_FALSE(name) \
        struct static_key_false name = STATIC_KEY_FALSE_INIT
  
+ #define DECLARE_STATIC_KEY_FALSE(name)        \
+       extern struct static_key_false name
 +#define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count)             \
 +      struct static_key_true name[count] = {                  \
 +              [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT,     \
 +      }
 +
 +#define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count)            \
 +      struct static_key_false name[count] = {                 \
 +              [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT,    \
 +      }
 +
  extern bool ____wrong_branch_error(void);
  
  #define static_key_enabled(x)                                                 \
index dc1f2f30c961e3b0174527d75867df67c8f5878a,4ad1b408c0bba6766587338edb22039be90bb85b..05d46ddb6f568ad2b3d8a8097f0d02f42d7f40b3
@@@ -14,7 -14,7 +14,7 @@@
  
  #include <linux/interrupt.h>
  #include <linux/perf_event.h>
 -
 +#include <linux/sysfs.h>
  #include <asm/cputype.h>
  
  /*
@@@ -77,13 -77,6 +77,13 @@@ struct pmu_hw_events 
        struct arm_pmu          *percpu_pmu;
  };
  
 +enum armpmu_attr_groups {
 +      ARMPMU_ATTR_GROUP_COMMON,
 +      ARMPMU_ATTR_GROUP_EVENTS,
 +      ARMPMU_ATTR_GROUP_FORMATS,
 +      ARMPMU_NR_ATTR_GROUPS
 +};
 +
  struct arm_pmu {
        struct pmu      pmu;
        cpumask_t       active_irqs;
        DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
        struct platform_device  *plat_device;
        struct pmu_hw_events    __percpu *hw_events;
-       struct list_head        entry;
+       struct hlist_node       node;
        struct notifier_block   cpu_pm_nb;
 +      /* the attr_groups array must be NULL-terminated */
 +      const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1];
  };
  
  #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
index ccb73a58113dd845f67dae43dbfcf2992ea6f7b3,5c5362584aba12095b03d66e18447d7e412a06f1..060d0ede88df6dfc34fbfcd1e60629d8dce5373d
@@@ -510,9 -510,15 +510,15 @@@ typedef void (*perf_overflow_handler_t)
                                        struct perf_sample_data *,
                                        struct pt_regs *regs);
  
- enum perf_group_flag {
-       PERF_GROUP_SOFTWARE             = 0x1,
- };
+ /*
+  * Event capabilities. For event_caps and groups caps.
+  *
+  * PERF_EV_CAP_SOFTWARE: Is a software event.
+  * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
+  * from any CPU in the package where it is active.
+  */
+ #define PERF_EV_CAP_SOFTWARE          BIT(0)
+ #define PERF_EV_CAP_READ_ACTIVE_PKG   BIT(1)
  
  #define SWEVENT_HLIST_BITS            8
  #define SWEVENT_HLIST_SIZE            (1 << SWEVENT_HLIST_BITS)
@@@ -568,7 -574,12 +574,12 @@@ struct perf_event 
        struct hlist_node               hlist_entry;
        struct list_head                active_entry;
        int                             nr_siblings;
-       int                             group_flags;
+       /* Not serialized. Only written during event initialization. */
+       int                             event_caps;
+       /* The cumulative AND of all event_caps for events in this group. */
+       int                             group_caps;
        struct perf_event               *group_leader;
        struct pmu                      *pmu;
        void                            *pmu_private;
        u64                             (*clock)(void);
        perf_overflow_handler_t         overflow_handler;
        void                            *overflow_handler_context;
 +#ifdef CONFIG_BPF_SYSCALL
 +      perf_overflow_handler_t         orig_overflow_handler;
 +      struct bpf_prog                 *prog;
 +#endif
  
  #ifdef CONFIG_EVENT_TRACING
        struct trace_event_call         *tp_event;
@@@ -778,6 -785,9 +789,9 @@@ struct perf_cpu_context 
  #ifdef CONFIG_CGROUP_PERF
        struct perf_cgroup              *cgrp;
  #endif
+       struct list_head                sched_cb_entry;
+       int                             sched_cb_usage;
  };
  
  struct perf_output_handle {
        int                             page;
  };
  
 +struct bpf_perf_event_data_kern {
 +      struct pt_regs *regs;
 +      struct perf_sample_data *data;
 +};
 +
  #ifdef CONFIG_CGROUP_PERF
  
  /*
@@@ -994,7 -999,7 +1008,7 @@@ static inline bool is_sampling_event(st
   */
  static inline int is_software_event(struct perf_event *event)
  {
-       return event->pmu->task_ctx_nr == perf_sw_context;
+       return event->event_caps & PERF_EV_CAP_SOFTWARE;
  }
  
  extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
diff --combined include/linux/sched.h
index b0fa726b7f31a625cb25667e55cb4d919a2271d4,eb64fcd89e68160d4b76a300dca4d0d64e076435..06bd6ab542313770f01251b1af0400086f51c97f
@@@ -1022,7 -1022,8 +1022,8 @@@ extern void wake_up_q(struct wake_q_hea
  #define SD_BALANCE_FORK               0x0008  /* Balance on fork, clone */
  #define SD_BALANCE_WAKE               0x0010  /* Balance on wakeup */
  #define SD_WAKE_AFFINE                0x0020  /* Wake task to waking CPU */
- #define SD_SHARE_CPUCAPACITY  0x0080  /* Domain members share cpu power */
+ #define SD_ASYM_CPUCAPACITY   0x0040  /* Groups have different max cpu capacities */
+ #define SD_SHARE_CPUCAPACITY  0x0080  /* Domain members share cpu capacity */
  #define SD_SHARE_POWERDOMAIN  0x0100  /* Domain members share power domain */
  #define SD_SHARE_PKG_RESOURCES        0x0200  /* Domain members share cpu pkg resources */
  #define SD_SERIALIZE          0x0400  /* Only a single load balancing instance */
@@@ -1923,6 -1924,9 +1924,9 @@@ struct task_struct 
  #ifdef CONFIG_MMU
        struct task_struct *oom_reaper_list;
  #endif
+ #ifdef CONFIG_VMAP_STACK
+       struct vm_struct *stack_vm_area;
+ #endif
  /* CPU-specific state of this task */
        struct thread_struct thread;
  /*
@@@ -1939,6 -1943,18 +1943,18 @@@ extern int arch_task_struct_size __read
  # define arch_task_struct_size (sizeof(struct task_struct))
  #endif
  
+ #ifdef CONFIG_VMAP_STACK
+ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+ {
+       return t->stack_vm_area;
+ }
+ #else
+ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+ {
+       return NULL;
+ }
+ #endif
  /* Future-safe accessor for struct task_struct's cpus_allowed. */
  #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  
@@@ -3236,6 -3252,15 +3252,15 @@@ static inline void cond_resched_rcu(voi
  #endif
  }
  
+ static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
+ {
+ #ifdef CONFIG_DEBUG_PREEMPT
+       return p->preempt_disable_ip;
+ #else
+       return 0;
+ #endif
+ }
  /*
   * Does a critical section need to be broken due to another
   * task waiting?: (technically does not depend on CONFIG_PREEMPT,
@@@ -3469,19 -3494,15 +3494,19 @@@ static inline unsigned long rlimit_max(
        return task_rlimit_max(current, limit);
  }
  
 +#define SCHED_CPUFREQ_RT      (1U << 0)
 +#define SCHED_CPUFREQ_DL      (1U << 1)
 +
 +#define SCHED_CPUFREQ_RT_DL   (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
 +
  #ifdef CONFIG_CPU_FREQ
  struct update_util_data {
 -      void (*func)(struct update_util_data *data,
 -                   u64 time, unsigned long util, unsigned long max);
 +       void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
  };
  
  void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
 -                      void (*func)(struct update_util_data *data, u64 time,
 -                                   unsigned long util, unsigned long max));
 +                       void (*func)(struct update_util_data *data, u64 time,
 +                                  unsigned int flags));
  void cpufreq_remove_update_util_hook(int cpu);
  #endif /* CONFIG_CPU_FREQ */
  
diff --combined kernel/cpu.c
index ebbf027dd4a13accdfd433d0cc50eb33182aa5cf,7c783876cbcb094f25b2f80392b5403af4426d57..e7eca02c757f7ede7743cabb00aa64c787e90e7a
@@@ -23,6 -23,8 +23,8 @@@
  #include <linux/tick.h>
  #include <linux/irq.h>
  #include <linux/smpboot.h>
+ #include <linux/relay.h>
+ #include <linux/slab.h>
  
  #include <trace/events/power.h>
  #define CREATE_TRACE_POINTS
@@@ -37,8 -39,9 +39,9 @@@
   * @thread:   Pointer to the hotplug thread
   * @should_run:       Thread should execute
   * @rollback: Perform a rollback
-  * @cb_stat:  The state for a single callback (install/uninstall)
-  * @cb:               Single callback function (install/uninstall)
+  * @single:   Single callback invocation
+  * @bringup:  Single callback bringup or teardown selector
+  * @cb_state: The state for a single callback (install/uninstall)
   * @result:   Result of the operation
   * @done:     Signal completion to the issuer of the task
   */
@@@ -49,8 -52,10 +52,10 @@@ struct cpuhp_cpu_state 
        struct task_struct      *thread;
        bool                    should_run;
        bool                    rollback;
+       bool                    single;
+       bool                    bringup;
+       struct hlist_node       *node;
        enum cpuhp_state        cb_state;
-       int                     (*cb)(unsigned int cpu);
        int                     result;
        struct completion       done;
  #endif
@@@ -68,35 -73,103 +73,103 @@@ static DEFINE_PER_CPU(struct cpuhp_cpu_
   * @cant_stop:        Bringup/teardown can't be stopped at this step
   */
  struct cpuhp_step {
-       const char      *name;
-       int             (*startup)(unsigned int cpu);
-       int             (*teardown)(unsigned int cpu);
-       bool            skip_onerr;
-       bool            cant_stop;
+       const char              *name;
+       union {
+               int             (*single)(unsigned int cpu);
+               int             (*multi)(unsigned int cpu,
+                                        struct hlist_node *node);
+       } startup;
+       union {
+               int             (*single)(unsigned int cpu);
+               int             (*multi)(unsigned int cpu,
+                                        struct hlist_node *node);
+       } teardown;
+       struct hlist_head       list;
+       bool                    skip_onerr;
+       bool                    cant_stop;
+       bool                    multi_instance;
  };
  
  static DEFINE_MUTEX(cpuhp_state_mutex);
  static struct cpuhp_step cpuhp_bp_states[];
  static struct cpuhp_step cpuhp_ap_states[];
  
+ static bool cpuhp_is_ap_state(enum cpuhp_state state)
+ {
+       /*
+        * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
+        * purposes as that state is handled explicitly in cpu_down.
+        */
+       return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
+ }
+ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
+ {
+       struct cpuhp_step *sp;
+       sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
+       return sp + state;
+ }
  /**
   * cpuhp_invoke_callback _ Invoke the callbacks for a given state
   * @cpu:      The cpu for which the callback should be invoked
   * @step:     The step in the state machine
-  * @cb:               The callback function to invoke
+  * @bringup:  True if the bringup callback should be invoked
   *
-  * Called from cpu hotplug and from the state register machinery
+  * Called from cpu hotplug and from the state register machinery.
   */
- static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
-                                int (*cb)(unsigned int))
+ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
+                                bool bringup, struct hlist_node *node)
  {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-       int ret = 0;
-       if (cb) {
-               trace_cpuhp_enter(cpu, st->target, step, cb);
+       struct cpuhp_step *step = cpuhp_get_step(state);
+       int (*cbm)(unsigned int cpu, struct hlist_node *node);
+       int (*cb)(unsigned int cpu);
+       int ret, cnt;
+       if (!step->multi_instance) {
+               cb = bringup ? step->startup.single : step->teardown.single;
+               if (!cb)
+                       return 0;
+               trace_cpuhp_enter(cpu, st->target, state, cb);
                ret = cb(cpu);
-               trace_cpuhp_exit(cpu, st->state, step, ret);
+               trace_cpuhp_exit(cpu, st->state, state, ret);
+               return ret;
+       }
+       cbm = bringup ? step->startup.multi : step->teardown.multi;
+       if (!cbm)
+               return 0;
+       /* Single invocation for instance add/remove */
+       if (node) {
+               trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+               ret = cbm(cpu, node);
+               trace_cpuhp_exit(cpu, st->state, state, ret);
+               return ret;
+       }
+       /* State transition. Invoke on all instances */
+       cnt = 0;
+       hlist_for_each(node, &step->list) {
+               trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+               ret = cbm(cpu, node);
+               trace_cpuhp_exit(cpu, st->state, state, ret);
+               if (ret)
+                       goto err;
+               cnt++;
+       }
+       return 0;
+ err:
+       /* Rollback the instances if one failed */
+       cbm = !bringup ? step->startup.multi : step->teardown.multi;
+       if (!cbm)
+               return ret;
+       hlist_for_each(node, &step->list) {
+               if (!cnt--)
+                       break;
+               cbm(cpu, node);
        }
        return ret;
  }
@@@ -260,10 -333,17 +333,17 @@@ void cpu_hotplug_disable(void
  }
  EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
  
+ static void __cpu_hotplug_enable(void)
+ {
+       if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
+               return;
+       cpu_hotplug_disabled--;
+ }
  void cpu_hotplug_enable(void)
  {
        cpu_maps_update_begin();
-       WARN_ON(--cpu_hotplug_disabled < 0);
+       __cpu_hotplug_enable();
        cpu_maps_update_done();
  }
  EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
@@@ -330,12 -410,6 +410,6 @@@ static int notify_online(unsigned int c
        return 0;
  }
  
- static int notify_starting(unsigned int cpu)
- {
-       cpu_notify(CPU_STARTING, cpu);
-       return 0;
- }
  static int bringup_wait_for_ap(unsigned int cpu)
  {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@@ -349,8 -423,16 +423,16 @@@ static int bringup_cpu(unsigned int cpu
        struct task_struct *idle = idle_thread_get(cpu);
        int ret;
  
+       /*
+        * Some architectures have to walk the irq descriptors to
+        * setup the vector space for the cpu which comes online.
+        * Prevent irq alloc/free across the bringup.
+        */
+       irq_lock_sparse();
        /* Arch-specific enabling code. */
        ret = __cpu_up(cpu, idle);
+       irq_unlock_sparse();
        if (ret) {
                cpu_notify(CPU_UP_CANCELED, cpu);
                return ret;
  /*
   * Hotplug state machine related functions
   */
- static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
-                         struct cpuhp_step *steps)
+ static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
  {
        for (st->state++; st->state < st->target; st->state++) {
-               struct cpuhp_step *step = steps + st->state;
+               struct cpuhp_step *step = cpuhp_get_step(st->state);
  
                if (!step->skip_onerr)
-                       cpuhp_invoke_callback(cpu, st->state, step->startup);
+                       cpuhp_invoke_callback(cpu, st->state, true, NULL);
        }
  }
  
  static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
-                               struct cpuhp_step *steps, enum cpuhp_state target)
+                               enum cpuhp_state target)
  {
        enum cpuhp_state prev_state = st->state;
        int ret = 0;
  
        for (; st->state > target; st->state--) {
-               struct cpuhp_step *step = steps + st->state;
-               ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
+               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
                if (ret) {
                        st->target = prev_state;
-                       undo_cpu_down(cpu, st, steps);
+                       undo_cpu_down(cpu, st);
                        break;
                }
        }
        return ret;
  }
  
- static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
-                       struct cpuhp_step *steps)
+ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
  {
        for (st->state--; st->state > st->target; st->state--) {
-               struct cpuhp_step *step = steps + st->state;
+               struct cpuhp_step *step = cpuhp_get_step(st->state);
  
                if (!step->skip_onerr)
-                       cpuhp_invoke_callback(cpu, st->state, step->teardown);
+                       cpuhp_invoke_callback(cpu, st->state, false, NULL);
        }
  }
  
  static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
-                             struct cpuhp_step *steps, enum cpuhp_state target)
+                             enum cpuhp_state target)
  {
        enum cpuhp_state prev_state = st->state;
        int ret = 0;
  
        while (st->state < target) {
-               struct cpuhp_step *step;
                st->state++;
-               step = steps + st->state;
-               ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
+               ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
                if (ret) {
                        st->target = prev_state;
-                       undo_cpu_up(cpu, st, steps);
+                       undo_cpu_up(cpu, st);
                        break;
                }
        }
@@@ -447,13 -522,13 +522,13 @@@ static int cpuhp_ap_offline(unsigned in
  {
        enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
  
-       return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
+       return cpuhp_down_callbacks(cpu, st, target);
  }
  
  /* Execute the online startup callbacks. Used to be CPU_ONLINE */
  static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
  {
-       return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
+       return cpuhp_up_callbacks(cpu, st, st->target);
  }
  
  /*
@@@ -476,18 -551,20 +551,20 @@@ static void cpuhp_thread_fun(unsigned i
        st->should_run = false;
  
        /* Single callback invocation for [un]install ? */
-       if (st->cb) {
+       if (st->single) {
                if (st->cb_state < CPUHP_AP_ONLINE) {
                        local_irq_disable();
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
+                                                   st->bringup, st->node);
                        local_irq_enable();
                } else {
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
+                                                   st->bringup, st->node);
                }
        } else if (st->rollback) {
                BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
  
-               undo_cpu_down(cpu, st, cpuhp_ap_states);
+               undo_cpu_down(cpu, st);
                /*
                 * This is a momentary workaround to keep the notifier users
                 * happy. Will go away once we got rid of the notifiers.
  }
  
  /* Invoke a single callback on a remote cpu */
- static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
-                                   int (*cb)(unsigned int))
+ static int
+ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
+                        struct hlist_node *node)
  {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  
         * we invoke the thread function directly.
         */
        if (!st->thread)
-               return cpuhp_invoke_callback(cpu, state, cb);
+               return cpuhp_invoke_callback(cpu, state, bringup, node);
  
        st->cb_state = state;
-       st->cb = cb;
+       st->single = true;
+       st->bringup = bringup;
+       st->node = node;
        /*
         * Make sure the above stores are visible before should_run becomes
         * true. Paired with the mb() above in cpuhp_thread_fun()
  static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
  {
        st->result = 0;
-       st->cb = NULL;
+       st->single = false;
        /*
         * Make sure the above stores are visible before should_run becomes
         * true. Paired with the mb() above in cpuhp_thread_fun()
@@@ -674,12 -755,6 +755,6 @@@ static int notify_down_prepare(unsigne
        return err;
  }
  
- static int notify_dying(unsigned int cpu)
- {
-       cpu_notify(CPU_DYING, cpu);
-       return 0;
- }
  /* Take this CPU down. */
  static int take_cpu_down(void *_param)
  {
        if (err < 0)
                return err;
  
+       /*
+        * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
+        * do this step again.
+        */
+       WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
+       st->state--;
        /* Invoke the former CPU_DYING callbacks */
-       for (; st->state > target; st->state--) {
-               struct cpuhp_step *step = cpuhp_ap_states + st->state;
+       for (; st->state > target; st->state--)
+               cpuhp_invoke_callback(cpu, st->state, false, NULL);
  
-               cpuhp_invoke_callback(cpu, st->state, step->teardown);
-       }
        /* Give up timekeeping duties */
        tick_handover_do_timer();
        /* Park the stopper thread */
@@@ -734,7 -813,7 +813,7 @@@ static int takedown_cpu(unsigned int cp
        BUG_ON(cpu_online(cpu));
  
        /*
-        * The migration_call() CPU_DYING callback will have removed all
+        * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
         * runnable tasks from the cpu, there's only the idle task left now
         * that the migration thread is done doing the stop_machine thing.
         *
@@@ -787,7 -866,6 +866,6 @@@ void cpuhp_report_idle_dead(void
  #define notify_down_prepare   NULL
  #define takedown_cpu          NULL
  #define notify_dead           NULL
- #define notify_dying          NULL
  #endif
  
  #ifdef CONFIG_HOTPLUG_CPU
@@@ -836,7 -914,7 +914,7 @@@ static int __ref _cpu_down(unsigned in
         * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
         * to do the further cleanups.
         */
-       ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+       ret = cpuhp_down_callbacks(cpu, st, target);
        if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
                st->target = prev_state;
                st->rollback = true;
@@@ -877,10 -955,9 +955,9 @@@ EXPORT_SYMBOL(cpu_down)
  #endif /*CONFIG_HOTPLUG_CPU*/
  
  /**
-  * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
   * @cpu: cpu that just started
   *
-  * This function calls the cpu_chain notifiers with CPU_STARTING.
   * It must be called by the arch code on the new cpu, before the new cpu
   * enables interrupts and before the "boot" cpu returns from __cpu_up().
   */
@@@ -890,11 -967,8 +967,8 @@@ void notify_cpu_starting(unsigned int c
        enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
  
        while (st->state < target) {
-               struct cpuhp_step *step;
                st->state++;
-               step = cpuhp_ap_states + st->state;
-               cpuhp_invoke_callback(cpu, st->state, step->startup);
+               cpuhp_invoke_callback(cpu, st->state, true, NULL);
        }
  }
  
@@@ -979,7 -1053,7 +1053,7 @@@ static int _cpu_up(unsigned int cpu, in
         * responsible for bringing it up to the target state.
         */
        target = min((int)target, CPUHP_BRINGUP_CPU);
-       ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
+       ret = cpuhp_up_callbacks(cpu, st, target);
  out:
        cpu_hotplug_done();
        return ret;
@@@ -1024,13 -1098,12 +1098,13 @@@ EXPORT_SYMBOL_GPL(cpu_up)
  #ifdef CONFIG_PM_SLEEP_SMP
  static cpumask_var_t frozen_cpus;
  
 -int disable_nonboot_cpus(void)
 +int freeze_secondary_cpus(int primary)
  {
 -      int cpu, first_cpu, error = 0;
 +      int cpu, error = 0;
  
        cpu_maps_update_begin();
 -      first_cpu = cpumask_first(cpu_online_mask);
 +      if (!cpu_online(primary))
 +              primary = cpumask_first(cpu_online_mask);
        /*
         * We take down all of the non-boot CPUs in one shot to avoid races
         * with the userspace trying to use the CPU hotplug at the same time
  
        pr_info("Disabling non-boot CPUs ...\n");
        for_each_online_cpu(cpu) {
 -              if (cpu == first_cpu)
 +              if (cpu == primary)
                        continue;
                trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
                error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
@@@ -1082,7 -1155,7 +1156,7 @@@ void enable_nonboot_cpus(void
  
        /* Allow everyone to use the CPU hotplug again */
        cpu_maps_update_begin();
-       WARN_ON(--cpu_hotplug_disabled < 0);
+       __cpu_hotplug_enable();
        if (cpumask_empty(frozen_cpus))
                goto out;
  
@@@ -1171,40 -1244,50 +1245,50 @@@ core_initcall(cpu_hotplug_pm_sync_init)
  static struct cpuhp_step cpuhp_bp_states[] = {
        [CPUHP_OFFLINE] = {
                .name                   = "offline",
-               .startup                = NULL,
-               .teardown               = NULL,
+               .startup.single         = NULL,
+               .teardown.single        = NULL,
        },
  #ifdef CONFIG_SMP
        [CPUHP_CREATE_THREADS]= {
-               .name                   = "threads:create",
-               .startup                = smpboot_create_threads,
-               .teardown               = NULL,
+               .name                   = "threads:prepare",
+               .startup.single         = smpboot_create_threads,
+               .teardown.single        = NULL,
                .cant_stop              = true,
        },
        [CPUHP_PERF_PREPARE] = {
-               .name = "perf prepare",
-               .startup = perf_event_init_cpu,
-               .teardown = perf_event_exit_cpu,
+               .name                   = "perf:prepare",
+               .startup.single         = perf_event_init_cpu,
+               .teardown.single        = perf_event_exit_cpu,
        },
        [CPUHP_WORKQUEUE_PREP] = {
-               .name = "workqueue prepare",
-               .startup = workqueue_prepare_cpu,
-               .teardown = NULL,
+               .name                   = "workqueue:prepare",
+               .startup.single         = workqueue_prepare_cpu,
+               .teardown.single        = NULL,
        },
        [CPUHP_HRTIMERS_PREPARE] = {
-               .name = "hrtimers prepare",
-               .startup = hrtimers_prepare_cpu,
-               .teardown = hrtimers_dead_cpu,
+               .name                   = "hrtimers:prepare",
+               .startup.single         = hrtimers_prepare_cpu,
+               .teardown.single        = hrtimers_dead_cpu,
        },
        [CPUHP_SMPCFD_PREPARE] = {
-               .name = "SMPCFD prepare",
-               .startup = smpcfd_prepare_cpu,
-               .teardown = smpcfd_dead_cpu,
+               .name                   = "smpcfd:prepare",
+               .startup.single         = smpcfd_prepare_cpu,
+               .teardown.single        = smpcfd_dead_cpu,
+       },
+       [CPUHP_RELAY_PREPARE] = {
+               .name                   = "relay:prepare",
+               .startup.single         = relay_prepare_cpu,
+               .teardown.single        = NULL,
+       },
+       [CPUHP_SLAB_PREPARE] = {
+               .name                   = "slab:prepare",
+               .startup.single         = slab_prepare_cpu,
+               .teardown.single        = slab_dead_cpu,
        },
        [CPUHP_RCUTREE_PREP] = {
-               .name = "RCU-tree prepare",
-               .startup = rcutree_prepare_cpu,
-               .teardown = rcutree_dead_cpu,
+               .name                   = "RCU/tree:prepare",
+               .startup.single         = rcutree_prepare_cpu,
+               .teardown.single        = rcutree_dead_cpu,
        },
        /*
         * Preparatory and dead notifiers. Will be replaced once the notifiers
         */
        [CPUHP_NOTIFY_PREPARE] = {
                .name                   = "notify:prepare",
-               .startup                = notify_prepare,
-               .teardown               = notify_dead,
+               .startup.single         = notify_prepare,
+               .teardown.single        = notify_dead,
                .skip_onerr             = true,
                .cant_stop              = true,
        },
         * otherwise a RCU stall occurs.
         */
        [CPUHP_TIMERS_DEAD] = {
-               .name = "timers dead",
-               .startup = NULL,
-               .teardown = timers_dead_cpu,
+               .name                   = "timers:dead",
+               .startup.single         = NULL,
+               .teardown.single        = timers_dead_cpu,
        },
        /* Kicks the plugged cpu into life */
        [CPUHP_BRINGUP_CPU] = {
                .name                   = "cpu:bringup",
-               .startup                = bringup_cpu,
-               .teardown               = NULL,
+               .startup.single         = bringup_cpu,
+               .teardown.single        = NULL,
                .cant_stop              = true,
        },
        [CPUHP_AP_SMPCFD_DYING] = {
-               .startup = NULL,
-               .teardown = smpcfd_dying_cpu,
+               .name                   = "smpcfd:dying",
+               .startup.single         = NULL,
+               .teardown.single        = smpcfd_dying_cpu,
        },
        /*
         * Handled on controll processor until the plugged processor manages
         */
        [CPUHP_TEARDOWN_CPU] = {
                .name                   = "cpu:teardown",
-               .startup                = NULL,
-               .teardown               = takedown_cpu,
+               .startup.single         = NULL,
+               .teardown.single        = takedown_cpu,
                .cant_stop              = true,
        },
  #else
@@@ -1271,24 -1355,13 +1356,13 @@@ static struct cpuhp_step cpuhp_ap_state
        /* First state is scheduler control. Interrupts are disabled */
        [CPUHP_AP_SCHED_STARTING] = {
                .name                   = "sched:starting",
-               .startup                = sched_cpu_starting,
-               .teardown               = sched_cpu_dying,
+               .startup.single         = sched_cpu_starting,
+               .teardown.single        = sched_cpu_dying,
        },
        [CPUHP_AP_RCUTREE_DYING] = {
-               .startup = NULL,
-               .teardown = rcutree_dying_cpu,
-       },
-       /*
-        * Low level startup/teardown notifiers. Run with interrupts
-        * disabled. Will be removed once the notifiers are converted to
-        * states.
-        */
-       [CPUHP_AP_NOTIFY_STARTING] = {
-               .name                   = "notify:starting",
-               .startup                = notify_starting,
-               .teardown               = notify_dying,
-               .skip_onerr             = true,
-               .cant_stop              = true,
+               .name                   = "RCU/tree:dying",
+               .startup.single         = NULL,
+               .teardown.single        = rcutree_dying_cpu,
        },
        /* Entry state on starting. Interrupts enabled from here on. Transient
         * state for synchronsization */
        },
        /* Handle smpboot threads park/unpark */
        [CPUHP_AP_SMPBOOT_THREADS] = {
-               .name                   = "smpboot:threads",
-               .startup                = smpboot_unpark_threads,
-               .teardown               = NULL,
+               .name                   = "smpboot/threads:online",
+               .startup.single         = smpboot_unpark_threads,
+               .teardown.single        = NULL,
        },
        [CPUHP_AP_PERF_ONLINE] = {
-               .name = "perf online",
-               .startup = perf_event_init_cpu,
-               .teardown = perf_event_exit_cpu,
+               .name                   = "perf:online",
+               .startup.single         = perf_event_init_cpu,
+               .teardown.single        = perf_event_exit_cpu,
        },
        [CPUHP_AP_WORKQUEUE_ONLINE] = {
-               .name = "workqueue online",
-               .startup = workqueue_online_cpu,
-               .teardown = workqueue_offline_cpu,
+               .name                   = "workqueue:online",
+               .startup.single         = workqueue_online_cpu,
+               .teardown.single        = workqueue_offline_cpu,
        },
        [CPUHP_AP_RCUTREE_ONLINE] = {
-               .name = "RCU-tree online",
-               .startup = rcutree_online_cpu,
-               .teardown = rcutree_offline_cpu,
+               .name                   = "RCU/tree:online",
+               .startup.single         = rcutree_online_cpu,
+               .teardown.single        = rcutree_offline_cpu,
        },
  
        /*
         */
        [CPUHP_AP_NOTIFY_ONLINE] = {
                .name                   = "notify:online",
-               .startup                = notify_online,
-               .teardown               = notify_down_prepare,
+               .startup.single         = notify_online,
+               .teardown.single        = notify_down_prepare,
                .skip_onerr             = true,
        },
  #endif
        /* Last state is scheduler control setting the cpu active */
        [CPUHP_AP_ACTIVE] = {
                .name                   = "sched:active",
-               .startup                = sched_cpu_activate,
-               .teardown               = sched_cpu_deactivate,
+               .startup.single         = sched_cpu_activate,
+               .teardown.single        = sched_cpu_deactivate,
        },
  #endif
  
        /* CPU is fully up and running. */
        [CPUHP_ONLINE] = {
                .name                   = "online",
-               .startup                = NULL,
-               .teardown               = NULL,
+               .startup.single         = NULL,
+               .teardown.single        = NULL,
        },
  };
  
@@@ -1357,54 -1430,42 +1431,42 @@@ static int cpuhp_cb_check(enum cpuhp_st
        return 0;
  }
  
- static bool cpuhp_is_ap_state(enum cpuhp_state state)
- {
-       /*
-        * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
-        * purposes as that state is handled explicitely in cpu_down.
-        */
-       return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
- }
- static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
- {
-       struct cpuhp_step *sp;
-       sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
-       return sp + state;
- }
  static void cpuhp_store_callbacks(enum cpuhp_state state,
                                  const char *name,
                                  int (*startup)(unsigned int cpu),
-                                 int (*teardown)(unsigned int cpu))
+                                 int (*teardown)(unsigned int cpu),
+                                 bool multi_instance)
  {
        /* (Un)Install the callbacks for further cpu hotplug operations */
        struct cpuhp_step *sp;
  
        mutex_lock(&cpuhp_state_mutex);
        sp = cpuhp_get_step(state);
-       sp->startup = startup;
-       sp->teardown = teardown;
+       sp->startup.single = startup;
+       sp->teardown.single = teardown;
        sp->name = name;
+       sp->multi_instance = multi_instance;
+       INIT_HLIST_HEAD(&sp->list);
        mutex_unlock(&cpuhp_state_mutex);
  }
  
  static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
  {
-       return cpuhp_get_step(state)->teardown;
+       return cpuhp_get_step(state)->teardown.single;
  }
  
  /*
   * Call the startup/teardown function for a step either on the AP or
   * on the current CPU.
   */
- static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
-                           int (*cb)(unsigned int), bool bringup)
+ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
+                           struct hlist_node *node)
  {
+       struct cpuhp_step *sp = cpuhp_get_step(state);
        int ret;
  
-       if (!cb)
+       if ((bringup && !sp->startup.single) ||
+           (!bringup && !sp->teardown.single))
                return 0;
        /*
         * The non AP bound callbacks can fail on bringup. On teardown
         */
  #ifdef CONFIG_SMP
        if (cpuhp_is_ap_state(state))
-               ret = cpuhp_invoke_ap_callback(cpu, state, cb);
+               ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
        else
-               ret = cpuhp_invoke_callback(cpu, state, cb);
+               ret = cpuhp_invoke_callback(cpu, state, bringup, node);
  #else
-       ret = cpuhp_invoke_callback(cpu, state, cb);
+       ret = cpuhp_invoke_callback(cpu, state, bringup, node);
  #endif
        BUG_ON(ret && !bringup);
        return ret;
   * Note: The teardown callbacks for rollback are not allowed to fail!
   */
  static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
-                                  int (*teardown)(unsigned int cpu))
+                                  struct hlist_node *node)
  {
        int cpu;
  
-       if (!teardown)
-               return;
        /* Roll back the already executed steps on the other cpus */
        for_each_present_cpu(cpu) {
                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  
                /* Did we invoke the startup call on that cpu ? */
                if (cpustate >= state)
-                       cpuhp_issue_call(cpu, state, teardown, false);
+                       cpuhp_issue_call(cpu, state, false, node);
        }
  }
  
@@@ -1472,6 -1530,52 +1531,52 @@@ static int cpuhp_reserve_state(enum cpu
        return -ENOSPC;
  }
  
+ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+                              bool invoke)
+ {
+       struct cpuhp_step *sp;
+       int cpu;
+       int ret;
+       sp = cpuhp_get_step(state);
+       if (sp->multi_instance == false)
+               return -EINVAL;
+       get_online_cpus();
+       if (!invoke || !sp->startup.multi)
+               goto add_node;
+       /*
+        * Try to call the startup callback for each present cpu
+        * depending on the hotplug state of the cpu.
+        */
+       for_each_present_cpu(cpu) {
+               struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+               int cpustate = st->state;
+               if (cpustate < state)
+                       continue;
+               ret = cpuhp_issue_call(cpu, state, true, node);
+               if (ret) {
+                       if (sp->teardown.multi)
+                               cpuhp_rollback_install(cpu, state, node);
+                       goto err;
+               }
+       }
+ add_node:
+       ret = 0;
+       mutex_lock(&cpuhp_state_mutex);
+       hlist_add_head(node, &sp->list);
+       mutex_unlock(&cpuhp_state_mutex);
+ err:
+       put_online_cpus();
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
  /**
   * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
   * @state:    The state to setup
  int __cpuhp_setup_state(enum cpuhp_state state,
                        const char *name, bool invoke,
                        int (*startup)(unsigned int cpu),
-                       int (*teardown)(unsigned int cpu))
+                       int (*teardown)(unsigned int cpu),
+                       bool multi_instance)
  {
        int cpu, ret = 0;
        int dyn_state = 0;
                state = ret;
        }
  
-       cpuhp_store_callbacks(state, name, startup, teardown);
+       cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
  
        if (!invoke || !startup)
                goto out;
                if (cpustate < state)
                        continue;
  
-               ret = cpuhp_issue_call(cpu, state, startup, true);
+               ret = cpuhp_issue_call(cpu, state, true, NULL);
                if (ret) {
-                       cpuhp_rollback_install(cpu, state, teardown);
-                       cpuhp_store_callbacks(state, NULL, NULL, NULL);
+                       if (teardown)
+                               cpuhp_rollback_install(cpu, state, NULL);
+                       cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
                        goto out;
                }
        }
  }
  EXPORT_SYMBOL(__cpuhp_setup_state);
  
+ int __cpuhp_state_remove_instance(enum cpuhp_state state,
+                                 struct hlist_node *node, bool invoke)
+ {
+       struct cpuhp_step *sp = cpuhp_get_step(state);
+       int cpu;
+       BUG_ON(cpuhp_cb_check(state));
+       if (!sp->multi_instance)
+               return -EINVAL;
+       get_online_cpus();
+       if (!invoke || !cpuhp_get_teardown_cb(state))
+               goto remove;
+       /*
+        * Call the teardown callback for each present cpu depending
+        * on the hotplug state of the cpu. This function is not
+        * allowed to fail currently!
+        */
+       for_each_present_cpu(cpu) {
+               struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+               int cpustate = st->state;
+               if (cpustate >= state)
+                       cpuhp_issue_call(cpu, state, false, node);
+       }
+ remove:
+       mutex_lock(&cpuhp_state_mutex);
+       hlist_del(node);
+       mutex_unlock(&cpuhp_state_mutex);
+       put_online_cpus();
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
  /**
   * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
   * @state:    The state to remove
   */
  void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
  {
-       int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
+       struct cpuhp_step *sp = cpuhp_get_step(state);
        int cpu;
  
        BUG_ON(cpuhp_cb_check(state));
  
        get_online_cpus();
  
-       if (!invoke || !teardown)
+       if (sp->multi_instance) {
+               WARN(!hlist_empty(&sp->list),
+                    "Error: Removing state %d which has instances left.\n",
+                    state);
+               goto remove;
+       }
+       if (!invoke || !cpuhp_get_teardown_cb(state))
                goto remove;
  
        /*
                int cpustate = st->state;
  
                if (cpustate >= state)
-                       cpuhp_issue_call(cpu, state, teardown, false);
+                       cpuhp_issue_call(cpu, state, false, NULL);
        }
  remove:
-       cpuhp_store_callbacks(state, NULL, NULL, NULL);
+       cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
        put_online_cpus();
  }
  EXPORT_SYMBOL(__cpuhp_remove_state);
diff --combined kernel/events/core.c
index a7b8c1c75fa71c057b504d249bfeff0161f0cd1f,fedba316cca0ec02e63c63f02743b45d1536d4fc..4a4a94774dfa3ed5e402b13d83d506596251ff41
@@@ -1475,8 -1475,7 +1475,7 @@@ list_add_event(struct perf_event *event
        if (event->group_leader == event) {
                struct list_head *list;
  
-               if (is_software_event(event))
-                       event->group_flags |= PERF_GROUP_SOFTWARE;
+               event->group_caps = event->event_caps;
  
                list = ctx_group_list(event, ctx);
                list_add_tail(&event->group_entry, list);
@@@ -1630,9 -1629,7 +1629,7 @@@ static void perf_group_attach(struct pe
  
        WARN_ON_ONCE(group_leader->ctx != event->ctx);
  
-       if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
-                       !is_software_event(event))
-               group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+       group_leader->group_caps &= event->event_caps;
  
        list_add_tail(&event->group_entry, &group_leader->sibling_list);
        group_leader->nr_siblings++;
@@@ -1723,7 -1720,7 +1720,7 @@@ static void perf_group_detach(struct pe
                sibling->group_leader = sibling;
  
                /* Inherit group flags from the previous leader */
-               sibling->group_flags = event->group_flags;
+               sibling->group_caps = event->group_caps;
  
                WARN_ON_ONCE(sibling->ctx != event->ctx);
        }
@@@ -1832,6 -1829,8 +1829,8 @@@ group_sched_out(struct perf_event *grou
        struct perf_event *event;
        int state = group_event->state;
  
+       perf_pmu_disable(ctx->pmu);
        event_sched_out(group_event, cpuctx, ctx);
  
        /*
        list_for_each_entry(event, &group_event->sibling_list, group_entry)
                event_sched_out(event, cpuctx, ctx);
  
+       perf_pmu_enable(ctx->pmu);
        if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
                cpuctx->exclusive = 0;
  }
@@@ -2145,7 -2146,7 +2146,7 @@@ static int group_can_go_on(struct perf_
        /*
         * Groups consisting entirely of software events can always go on.
         */
-       if (event->group_flags & PERF_GROUP_SOFTWARE)
+       if (event->group_caps & PERF_EV_CAP_SOFTWARE)
                return 1;
        /*
         * If an exclusive group is already on, no other hardware
@@@ -2491,16 -2492,16 +2492,16 @@@ static int __perf_event_stop(void *info
         * while restarting.
         */
        if (sd->restart)
-               event->pmu->start(event, PERF_EF_START);
+               event->pmu->start(event, 0);
  
        return 0;
  }
  
- static int perf_event_restart(struct perf_event *event)
+ static int perf_event_stop(struct perf_event *event, int restart)
  {
        struct stop_event_data sd = {
                .event          = event,
-               .restart        = 1,
+               .restart        = restart,
        };
        int ret = 0;
  
@@@ -2837,19 -2838,36 +2838,36 @@@ unlock
        }
  }
  
+ static DEFINE_PER_CPU(struct list_head, sched_cb_list);
  void perf_sched_cb_dec(struct pmu *pmu)
  {
+       struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
        this_cpu_dec(perf_sched_cb_usages);
+       if (!--cpuctx->sched_cb_usage)
+               list_del(&cpuctx->sched_cb_entry);
  }
  
  void perf_sched_cb_inc(struct pmu *pmu)
  {
+       struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+       if (!cpuctx->sched_cb_usage++)
+               list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
        this_cpu_inc(perf_sched_cb_usages);
  }
  
  /*
   * This function provides the context switch callback to the lower code
   * layer. It is invoked ONLY when the context switch callback is enabled.
+  *
+  * This callback is relevant even to per-cpu events; for example multi event
+  * PEBS requires this to provide PID/TID information. This requires we flush
+  * all queued PEBS records before we context switch to a new task.
   */
  static void perf_pmu_sched_task(struct task_struct *prev,
                                struct task_struct *next,
  {
        struct perf_cpu_context *cpuctx;
        struct pmu *pmu;
-       unsigned long flags;
  
        if (prev == next)
                return;
  
-       local_irq_save(flags);
-       rcu_read_lock();
-       list_for_each_entry_rcu(pmu, &pmus, entry) {
-               if (pmu->sched_task) {
-                       cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-                       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+       list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
+               pmu = cpuctx->unique_pmu; /* software PMUs will not have sched_task */
  
-                       perf_pmu_disable(pmu);
+               if (WARN_ON_ONCE(!pmu->sched_task))
+                       continue;
  
-                       pmu->sched_task(cpuctx->task_ctx, sched_in);
+               perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+               perf_pmu_disable(pmu);
  
-                       perf_pmu_enable(pmu);
+               pmu->sched_task(cpuctx->task_ctx, sched_in);
  
-                       perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-               }
+               perf_pmu_enable(pmu);
+               perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
        }
-       rcu_read_unlock();
-       local_irq_restore(flags);
  }
  
  static void perf_event_switch(struct task_struct *task,
@@@ -3416,6 -3424,22 +3424,22 @@@ struct perf_read_data 
        int ret;
  };
  
+ static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+ {
+       int event_cpu = event->oncpu;
+       u16 local_pkg, event_pkg;
+       if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
+               event_pkg =  topology_physical_package_id(event_cpu);
+               local_pkg =  topology_physical_package_id(local_cpu);
+               if (event_pkg == local_pkg)
+                       return local_cpu;
+       }
+       return event_cpu;
+ }
  /*
   * Cross CPU call to read the hardware event
   */
@@@ -3537,7 -3561,7 +3561,7 @@@ u64 perf_event_read_local(struct perf_e
  
  static int perf_event_read(struct perf_event *event, bool group)
  {
-       int ret = 0;
+       int ret = 0, cpu_to_read, local_cpu;
  
        /*
         * If event is enabled and currently active on a CPU, update the
                        .group = group,
                        .ret = 0,
                };
-               ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
-               /* The event must have been read from an online CPU: */
-               WARN_ON_ONCE(ret);
-               ret = ret ? : data.ret;
+               local_cpu = get_cpu();
+               cpu_to_read = find_cpu_to_read(event, local_cpu);
+               put_cpu();
+               /*
+                * Purposely ignore the smp_call_function_single() return
+                * value.
+                *
+                * If event->oncpu isn't a valid CPU it means the event got
+                * scheduled out and that will have updated the event count.
+                *
+                * Therefore, either way, we'll have an up-to-date event count
+                * after this.
+                */
+               (void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
+               ret = data.ret;
        } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
                struct perf_event_context *ctx = event->ctx;
                unsigned long flags;
@@@ -4837,6 -4874,19 +4874,19 @@@ static void ring_buffer_attach(struct p
                spin_unlock_irqrestore(&rb->event_lock, flags);
        }
  
+       /*
+        * Avoid racing with perf_mmap_close(AUX): stop the event
+        * before swizzling the event::rb pointer; if it's getting
+        * unmapped, its aux_mmap_count will be 0 and it won't
+        * restart. See the comment in __perf_pmu_output_stop().
+        *
+        * Data will inevitably be lost when set_output is done in
+        * mid-air, but then again, whoever does it like this is
+        * not in for the data anyway.
+        */
+       if (has_aux(event))
+               perf_event_stop(event, 0);
        rcu_assign_pointer(event->rb, rb);
  
        if (old_rb) {
@@@ -5329,9 -5379,10 +5379,10 @@@ perf_output_sample_regs(struct perf_out
                        struct pt_regs *regs, u64 mask)
  {
        int bit;
+       DECLARE_BITMAP(_mask, 64);
  
-       for_each_set_bit(bit, (const unsigned long *) &mask,
-                        sizeof(mask) * BITS_PER_BYTE) {
+       bitmap_from_u64(_mask, mask);
+       for_each_set_bit(bit, _mask, sizeof(mask) * BITS_PER_BYTE) {
                u64 val;
  
                val = perf_reg_value(regs, bit);
@@@ -6112,7 -6163,7 +6163,7 @@@ static void perf_event_addr_filters_exe
        raw_spin_unlock_irqrestore(&ifh->lock, flags);
  
        if (restart)
-               perf_event_restart(event);
+               perf_event_stop(event, 1);
  }
  
  void perf_event_exec(void)
@@@ -6156,7 -6207,13 +6207,13 @@@ static void __perf_event_output_stop(st
  
        /*
         * In case of inheritance, it will be the parent that links to the
-        * ring-buffer, but it will be the child that's actually using it:
+        * ring-buffer, but it will be the child that's actually using it.
+        *
+        * We are using event::rb to determine if the event should be stopped,
+        * however this may race with ring_buffer_attach() (through set_output),
+        * which will make us skip the event that actually needs to be stopped.
+        * So ring_buffer_attach() has to stop an aux event before re-assigning
+        * its rb pointer.
         */
        if (rcu_dereference(parent->rb) == rb)
                ro->err = __perf_event_stop(&sd);
@@@ -6670,7 -6727,7 +6727,7 @@@ static void __perf_addr_filters_adjust(
        raw_spin_unlock_irqrestore(&ifh->lock, flags);
  
        if (restart)
-               perf_event_restart(event);
+               perf_event_stop(event, 1);
  }
  
  /*
@@@ -7022,7 -7079,7 +7079,7 @@@ static int __perf_event_overflow(struc
                irq_work_queue(&event->pending);
        }
  
 -      event->overflow_handler(event, data, regs);
 +      READ_ONCE(event->overflow_handler)(event, data, regs);
  
        if (*perf_event_fasync(event) && event->pending_kill) {
                event->pending_wakeup = 1;
@@@ -7637,83 -7694,11 +7694,83 @@@ static void perf_event_free_filter(stru
        ftrace_profile_free_filter(event);
  }
  
 +#ifdef CONFIG_BPF_SYSCALL
 +static void bpf_overflow_handler(struct perf_event *event,
 +                               struct perf_sample_data *data,
 +                               struct pt_regs *regs)
 +{
 +      struct bpf_perf_event_data_kern ctx = {
 +              .data = data,
 +              .regs = regs,
 +      };
 +      int ret = 0;
 +
 +      preempt_disable();
 +      if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
 +              goto out;
 +      rcu_read_lock();
 +      ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
 +      rcu_read_unlock();
 +out:
 +      __this_cpu_dec(bpf_prog_active);
 +      preempt_enable();
 +      if (!ret)
 +              return;
 +
 +      event->orig_overflow_handler(event, data, regs);
 +}
 +
 +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
 +{
 +      struct bpf_prog *prog;
 +
 +      if (event->overflow_handler_context)
 +              /* hw breakpoint or kernel counter */
 +              return -EINVAL;
 +
 +      if (event->prog)
 +              return -EEXIST;
 +
 +      prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
 +      if (IS_ERR(prog))
 +              return PTR_ERR(prog);
 +
 +      event->prog = prog;
 +      event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
 +      WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
 +      return 0;
 +}
 +
 +static void perf_event_free_bpf_handler(struct perf_event *event)
 +{
 +      struct bpf_prog *prog = event->prog;
 +
 +      if (!prog)
 +              return;
 +
 +      WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
 +      event->prog = NULL;
 +      bpf_prog_put(prog);
 +}
 +#else
 +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
 +{
 +      return -EOPNOTSUPP;
 +}
 +static void perf_event_free_bpf_handler(struct perf_event *event)
 +{
 +}
 +#endif
 +
  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
  {
        bool is_kprobe, is_tracepoint;
        struct bpf_prog *prog;
  
 +      if (event->attr.type == PERF_TYPE_HARDWARE ||
 +          event->attr.type == PERF_TYPE_SOFTWARE)
 +              return perf_event_set_bpf_handler(event, prog_fd);
 +
        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return -EINVAL;
  
@@@ -7754,8 -7739,6 +7811,8 @@@ static void perf_event_free_bpf_prog(st
  {
        struct bpf_prog *prog;
  
 +      perf_event_free_bpf_handler(event);
 +
        if (!event->tp_event)
                return;
  
@@@ -7933,7 -7916,7 +7990,7 @@@ static void perf_event_addr_filters_app
        mmput(mm);
  
  restart:
-       perf_event_restart(event);
+       perf_event_stop(event, 1);
  }
  
  /*
@@@ -9072,19 -9055,6 +9129,19 @@@ perf_event_alloc(struct perf_event_att
        if (!overflow_handler && parent_event) {
                overflow_handler = parent_event->overflow_handler;
                context = parent_event->overflow_handler_context;
 +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
 +              if (overflow_handler == bpf_overflow_handler) {
 +                      struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
 +
 +                      if (IS_ERR(prog)) {
 +                              err = PTR_ERR(prog);
 +                              goto err_ns;
 +                      }
 +                      event->prog = prog;
 +                      event->orig_overflow_handler =
 +                              parent_event->orig_overflow_handler;
 +              }
 +#endif
        }
  
        if (overflow_handler) {
@@@ -9565,6 -9535,9 +9622,9 @@@ SYSCALL_DEFINE5(perf_event_open
                        goto err_alloc;
        }
  
+       if (pmu->task_ctx_nr == perf_sw_context)
+               event->event_caps |= PERF_EV_CAP_SOFTWARE;
        if (group_leader &&
            (is_software_event(event) != is_software_event(group_leader))) {
                if (is_software_event(event)) {
                         */
                        pmu = group_leader->pmu;
                } else if (is_software_event(group_leader) &&
-                          (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
+                          (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
                        /*
                         * In case the group is a pure software group, and we
                         * try to add a hardware event, move the whole group to
@@@ -10513,6 -10486,8 +10573,8 @@@ static void __init perf_event_init_all_
  
                INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
                raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
+               INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
        }
  }
  
diff --combined kernel/fork.c
index 05393881ef399ac365b6fa522864875a1e55b6f7,0c240fd5beba2c7eefc0df23375ea99291ce76e6..c2ecca44406b130e0ec7abc4d2648beff6738814
@@@ -158,19 -158,39 +158,39 @@@ void __weak arch_release_thread_stack(u
   * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
   * kmemcache based allocator.
   */
- # if THREAD_SIZE >= PAGE_SIZE
- static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
-                                                 int node)
+ # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
+ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
  {
+ #ifdef CONFIG_VMAP_STACK
+       void *stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
+                                          VMALLOC_START, VMALLOC_END,
+                                          THREADINFO_GFP | __GFP_HIGHMEM,
+                                          PAGE_KERNEL,
+                                          0, node,
+                                          __builtin_return_address(0));
+       /*
+        * We can't call find_vm_area() in interrupt context, and
+        * free_thread_stack() can be called in interrupt context,
+        * so cache the vm_struct.
+        */
+       if (stack)
+               tsk->stack_vm_area = find_vm_area(stack);
+       return stack;
+ #else
        struct page *page = alloc_pages_node(node, THREADINFO_GFP,
                                             THREAD_SIZE_ORDER);
  
        return page ? page_address(page) : NULL;
+ #endif
  }
  
- static inline void free_thread_stack(unsigned long *stack)
+ static inline void free_thread_stack(struct task_struct *tsk)
  {
-       __free_pages(virt_to_page(stack), THREAD_SIZE_ORDER);
+       if (task_stack_vm_area(tsk))
+               vfree(tsk->stack);
+       else
+               __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
  }
  # else
  static struct kmem_cache *thread_stack_cache;
@@@ -181,9 -201,9 +201,9 @@@ static unsigned long *alloc_thread_stac
        return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
  }
  
- static void free_thread_stack(unsigned long *stack)
+ static void free_thread_stack(struct task_struct *tsk)
  {
-       kmem_cache_free(thread_stack_cache, stack);
+       kmem_cache_free(thread_stack_cache, tsk->stack);
  }
  
  void thread_stack_cache_init(void)
@@@ -213,24 -233,47 +233,47 @@@ struct kmem_cache *vm_area_cachep
  /* SLAB cache for mm_struct structures (tsk->mm) */
  static struct kmem_cache *mm_cachep;
  
- static void account_kernel_stack(unsigned long *stack, int account)
+ static void account_kernel_stack(struct task_struct *tsk, int account)
  {
-       /* All stack pages are in the same zone and belong to the same memcg. */
-       struct page *first_page = virt_to_page(stack);
+       void *stack = task_stack_page(tsk);
+       struct vm_struct *vm = task_stack_vm_area(tsk);
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+       if (vm) {
+               int i;
+               BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+               for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+                       mod_zone_page_state(page_zone(vm->pages[i]),
+                                           NR_KERNEL_STACK_KB,
+                                           PAGE_SIZE / 1024 * account);
+               }
  
-       mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
-                           THREAD_SIZE / 1024 * account);
+               /* All stack pages belong to the same memcg. */
+               memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
+                                           account * (THREAD_SIZE / 1024));
+       } else {
+               /*
+                * All stack pages are in the same zone and belong to the
+                * same memcg.
+                */
+               struct page *first_page = virt_to_page(stack);
  
-       memcg_kmem_update_page_stat(
-               first_page, MEMCG_KERNEL_STACK_KB,
-               account * (THREAD_SIZE / 1024));
+               mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
+                                   THREAD_SIZE / 1024 * account);
+               memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
+                                           account * (THREAD_SIZE / 1024));
+       }
  }
  
  void free_task(struct task_struct *tsk)
  {
-       account_kernel_stack(tsk->stack, -1);
+       account_kernel_stack(tsk, -1);
        arch_release_thread_stack(tsk->stack);
-       free_thread_stack(tsk->stack);
+       free_thread_stack(tsk);
        rt_mutex_debug_task_free(tsk);
        ftrace_graph_exit_task(tsk);
        put_seccomp_filter(tsk);
@@@ -342,6 -385,7 +385,7 @@@ static struct task_struct *dup_task_str
  {
        struct task_struct *tsk;
        unsigned long *stack;
+       struct vm_struct *stack_vm_area;
        int err;
  
        if (node == NUMA_NO_NODE)
        if (!stack)
                goto free_tsk;
  
+       stack_vm_area = task_stack_vm_area(tsk);
        err = arch_dup_task_struct(tsk, orig);
+       /*
+        * arch_dup_task_struct() clobbers the stack-related fields.  Make
+        * sure they're properly initialized before using any stack-related
+        * functions again.
+        */
+       tsk->stack = stack;
+ #ifdef CONFIG_VMAP_STACK
+       tsk->stack_vm_area = stack_vm_area;
+ #endif
        if (err)
                goto free_stack;
  
-       tsk->stack = stack;
  #ifdef CONFIG_SECCOMP
        /*
         * We must handle setting up seccomp filters once we're under
        tsk->task_frag.page = NULL;
        tsk->wake_q.next = NULL;
  
-       account_kernel_stack(stack, 1);
+       account_kernel_stack(tsk, 1);
  
        kcov_task_init(tsk);
  
        return tsk;
  
  free_stack:
-       free_thread_stack(stack);
+       free_thread_stack(tsk);
  free_tsk:
        free_task_struct(tsk);
        return NULL;
  }
  
  #ifdef CONFIG_MMU
 -static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 +static __latent_entropy int dup_mmap(struct mm_struct *mm,
 +                                      struct mm_struct *oldmm)
  {
        struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
        struct rb_node **rb_link, *rb_parent;
@@@ -1297,8 -1352,7 +1353,8 @@@ init_task_pid(struct task_struct *task
   * parts of the process environment (as per the clone
   * flags). The actual kick-off is left to the caller.
   */
 -static struct task_struct *copy_process(unsigned long clone_flags,
 +static __latent_entropy struct task_struct *copy_process(
 +                                      unsigned long clone_flags,
                                        unsigned long stack_start,
                                        unsigned long stack_size,
                                        int __user *child_tidptr,
@@@ -1782,7 -1836,6 +1838,7 @@@ long _do_fork(unsigned long clone_flags
  
        p = copy_process(clone_flags, stack_start, stack_size,
                         child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
 +      add_latent_entropy();
        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
diff --combined kernel/sched/deadline.c
index 9747796569997a5b195144a1e1456f934fce25b9,0c75bc656178562a79ad3bad9d54d03edc45fd00..37e2449186c4f5effb60de3fdc0b85c78732c79a
@@@ -243,10 -243,8 +243,8 @@@ static struct rq *find_lock_later_rq(st
  static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
  {
        struct rq *later_rq = NULL;
-       bool fallback = false;
  
        later_rq = find_lock_later_rq(p, rq);
        if (!later_rq) {
                int cpu;
  
                 * If we cannot preempt any rq, fall back to pick any
                 * online cpu.
                 */
-               fallback = true;
                cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
                if (cpu >= nr_cpu_ids) {
                        /*
                double_lock_balance(rq, later_rq);
        }
  
-       /*
-        * By now the task is replenished and enqueued; migrate it.
-        */
-       deactivate_task(rq, p, 0);
        set_task_cpu(p, later_rq->cpu);
-       activate_task(later_rq, p, 0);
-       if (!fallback)
-               resched_curr(later_rq);
        double_unlock_balance(later_rq, rq);
  
        return later_rq;
@@@ -346,12 -334,12 +334,12 @@@ static void check_preempt_curr_dl(struc
   * one, and to (try to!) reconcile itself with its own scheduling
   * parameters.
   */
- static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
-                                      struct sched_dl_entity *pi_se)
+ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
  {
        struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
        struct rq *rq = rq_of_dl_rq(dl_rq);
  
+       WARN_ON(dl_se->dl_boosted);
        WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
  
        /*
         * future; in fact, we must consider execution overheads (time
         * spent on hardirq context, etc.).
         */
-       dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
-       dl_se->runtime = pi_se->dl_runtime;
+       dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
+       dl_se->runtime = dl_se->dl_runtime;
  }
  
  /*
@@@ -641,29 -629,31 +629,31 @@@ static enum hrtimer_restart dl_task_tim
                goto unlock;
        }
  
-       enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
-       if (dl_task(rq->curr))
-               check_preempt_curr_dl(rq, p, 0);
-       else
-               resched_curr(rq);
  #ifdef CONFIG_SMP
-       /*
-        * Perform balancing operations here; after the replenishments.  We
-        * cannot drop rq->lock before this, otherwise the assertion in
-        * start_dl_timer() about not missing updates is not true.
-        *
-        * If we find that the rq the task was on is no longer available, we
-        * need to select a new rq.
-        *
-        * XXX figure out if select_task_rq_dl() deals with offline cpus.
-        */
        if (unlikely(!rq->online)) {
+               /*
+                * If the runqueue is no longer available, migrate the
+                * task elsewhere. This necessarily changes rq.
+                */
                lockdep_unpin_lock(&rq->lock, rf.cookie);
                rq = dl_task_offline_migration(rq, p);
                rf.cookie = lockdep_pin_lock(&rq->lock);
+               /*
+                * Now that the task has been migrated to the new RQ and we
+                * have that locked, proceed as normal and enqueue the task
+                * there.
+                */
        }
+ #endif
+       enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
+       if (dl_task(rq->curr))
+               check_preempt_curr_dl(rq, p, 0);
+       else
+               resched_curr(rq);
  
+ #ifdef CONFIG_SMP
        /*
         * Queueing this task back might have overloaded rq, check if we need
         * to kick someone away.
@@@ -735,8 -725,9 +725,8 @@@ static void update_curr_dl(struct rq *r
                return;
        }
  
 -      /* kick cpufreq (see the comment in linux/cpufreq.h). */
 -      if (cpu_of(rq) == smp_processor_id())
 -              cpufreq_trigger_update(rq_clock(rq));
 +      /* kick cpufreq (see the comment in kernel/sched/sched.h). */
 +      cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
  
        schedstat_set(curr->se.statistics.exec_max,
                      max(curr->se.statistics.exec_max, delta_exec));
@@@ -797,7 -788,7 +787,7 @@@ static void inc_dl_deadline(struct dl_r
        if (dl_rq->earliest_dl.curr == 0 ||
            dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
                dl_rq->earliest_dl.curr = deadline;
-               cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
+               cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
        }
  }
  
@@@ -812,14 -803,14 +802,14 @@@ static void dec_dl_deadline(struct dl_r
        if (!dl_rq->dl_nr_running) {
                dl_rq->earliest_dl.curr = 0;
                dl_rq->earliest_dl.next = 0;
-               cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
+               cpudl_clear(&rq->rd->cpudl, rq->cpu);
        } else {
                struct rb_node *leftmost = dl_rq->rb_leftmost;
                struct sched_dl_entity *entry;
  
                entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
                dl_rq->earliest_dl.curr = entry->deadline;
-               cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
+               cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
        }
  }
  
@@@ -1670,7 -1661,7 +1660,7 @@@ static void rq_online_dl(struct rq *rq
  
        cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
        if (rq->dl.dl_nr_running > 0)
-               cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
+               cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
  }
  
  /* Assumes rq->lock is held */
@@@ -1679,7 -1670,7 +1669,7 @@@ static void rq_offline_dl(struct rq *rq
        if (rq->dl.overloaded)
                dl_clear_overload(rq);
  
-       cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
+       cpudl_clear(&rq->rd->cpudl, rq->cpu);
        cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
  }
  
@@@ -1722,10 -1713,20 +1712,20 @@@ static void switched_from_dl(struct rq 
   */
  static void switched_to_dl(struct rq *rq, struct task_struct *p)
  {
+       /* If p is not queued we will update its parameters at next wakeup. */
+       if (!task_on_rq_queued(p))
+               return;
+       /*
+        * If p is boosted we already updated its params in
+        * rt_mutex_setprio()->enqueue_task(..., ENQUEUE_REPLENISH),
+        * p's deadline being now already after rq_clock(rq).
+        */
        if (dl_time_before(p->dl.deadline, rq_clock(rq)))
-               setup_new_dl_entity(&p->dl, &p->dl);
+               setup_new_dl_entity(&p->dl);
  
-       if (task_on_rq_queued(p) && rq->curr != p) {
+       if (rq->curr != p) {
  #ifdef CONFIG_SMP
                if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
                        queue_push_tasks(rq);
diff --combined kernel/sched/fair.c
index e6a9dee0d34f9fb9a61f3e33c80e057a25ef1bcc,986c10c25176ae652a1f3bde0af7117af270c5c6..07aaa7f085974663e7716757e80b520c925d52f7
@@@ -114,6 -114,12 +114,12 @@@ unsigned int __read_mostly sysctl_sched
  unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
  #endif
  
+ /*
+  * The margin used when comparing utilization with CPU capacity:
+  * util * 1024 < capacity * margin
+  */
+ unsigned int capacity_margin = 1280; /* ~20% */
  static inline void update_load_add(struct load_weight *lw, unsigned long inc)
  {
        lw->weight += inc;
@@@ -656,7 -662,7 +662,7 @@@ static u64 sched_vslice(struct cfs_rq *
  }
  
  #ifdef CONFIG_SMP
- static int select_idle_sibling(struct task_struct *p, int cpu);
+ static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
  static unsigned long task_h_load(struct task_struct *p);
  
  /*
@@@ -726,7 -732,6 +732,6 @@@ void post_init_entity_util_avg(struct s
        struct sched_avg *sa = &se->avg;
        long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
        u64 now = cfs_rq_clock_task(cfs_rq);
-       int tg_update;
  
        if (cap > 0) {
                if (cfs_rq->avg.util_avg != 0) {
                }
        }
  
-       tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+       update_cfs_rq_load_avg(now, cfs_rq, false);
        attach_entity_load_avg(cfs_rq, se);
-       if (tg_update)
-               update_tg_load_avg(cfs_rq, false);
+       update_tg_load_avg(cfs_rq, false);
  }
  
  #else /* !CONFIG_SMP */
@@@ -799,7 -803,7 +803,7 @@@ static void update_curr(struct cfs_rq *
                      max(delta_exec, curr->statistics.exec_max));
  
        curr->sum_exec_runtime += delta_exec;
-       schedstat_add(cfs_rqexec_clock, delta_exec);
+       schedstat_add(cfs_rq->exec_clock, delta_exec);
  
        curr->vruntime += calc_delta_fair(delta_exec, curr);
        update_min_vruntime(cfs_rq);
@@@ -820,26 -824,34 +824,34 @@@ static void update_curr_fair(struct rq 
        update_curr(cfs_rq_of(&rq->curr->se));
  }
  
- #ifdef CONFIG_SCHEDSTATS
  static inline void
  update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
-       u64 wait_start = rq_clock(rq_of(cfs_rq));
+       u64 wait_start, prev_wait_start;
+       if (!schedstat_enabled())
+               return;
+       wait_start = rq_clock(rq_of(cfs_rq));
+       prev_wait_start = schedstat_val(se->statistics.wait_start);
  
        if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
-           likely(wait_start > se->statistics.wait_start))
-               wait_start -= se->statistics.wait_start;
+           likely(wait_start > prev_wait_start))
+               wait_start -= prev_wait_start;
  
-       se->statistics.wait_start = wait_start;
+       schedstat_set(se->statistics.wait_start, wait_start);
  }
  
- static void
+ static inline void
  update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
        struct task_struct *p;
        u64 delta;
  
-       delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+       if (!schedstat_enabled())
+               return;
+       delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
  
        if (entity_is_task(se)) {
                p = task_of(se);
                         * time stamp can be adjusted to accumulate wait time
                         * prior to migration.
                         */
-                       se->statistics.wait_start = delta;
+                       schedstat_set(se->statistics.wait_start, delta);
                        return;
                }
                trace_sched_stat_wait(p, delta);
        }
  
-       se->statistics.wait_max = max(se->statistics.wait_max, delta);
-       se->statistics.wait_count++;
-       se->statistics.wait_sum += delta;
-       se->statistics.wait_start = 0;
+       schedstat_set(se->statistics.wait_max,
+                     max(schedstat_val(se->statistics.wait_max), delta));
+       schedstat_inc(se->statistics.wait_count);
+       schedstat_add(se->statistics.wait_sum, delta);
+       schedstat_set(se->statistics.wait_start, 0);
+ }
+ static inline void
+ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
+       struct task_struct *tsk = NULL;
+       u64 sleep_start, block_start;
+       if (!schedstat_enabled())
+               return;
+       sleep_start = schedstat_val(se->statistics.sleep_start);
+       block_start = schedstat_val(se->statistics.block_start);
+       if (entity_is_task(se))
+               tsk = task_of(se);
+       if (sleep_start) {
+               u64 delta = rq_clock(rq_of(cfs_rq)) - sleep_start;
+               if ((s64)delta < 0)
+                       delta = 0;
+               if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
+                       schedstat_set(se->statistics.sleep_max, delta);
+               schedstat_set(se->statistics.sleep_start, 0);
+               schedstat_add(se->statistics.sum_sleep_runtime, delta);
+               if (tsk) {
+                       account_scheduler_latency(tsk, delta >> 10, 1);
+                       trace_sched_stat_sleep(tsk, delta);
+               }
+       }
+       if (block_start) {
+               u64 delta = rq_clock(rq_of(cfs_rq)) - block_start;
+               if ((s64)delta < 0)
+                       delta = 0;
+               if (unlikely(delta > schedstat_val(se->statistics.block_max)))
+                       schedstat_set(se->statistics.block_max, delta);
+               schedstat_set(se->statistics.block_start, 0);
+               schedstat_add(se->statistics.sum_sleep_runtime, delta);
+               if (tsk) {
+                       if (tsk->in_iowait) {
+                               schedstat_add(se->statistics.iowait_sum, delta);
+                               schedstat_inc(se->statistics.iowait_count);
+                               trace_sched_stat_iowait(tsk, delta);
+                       }
+                       trace_sched_stat_blocked(tsk, delta);
+                       /*
+                        * Blocking time is in units of nanosecs, so shift by
+                        * 20 to get a milliseconds-range estimation of the
+                        * amount of time that the task spent sleeping:
+                        */
+                       if (unlikely(prof_on == SLEEP_PROFILING)) {
+                               profile_hits(SLEEP_PROFILING,
+                                               (void *)get_wchan(tsk),
+                                               delta >> 20);
+                       }
+                       account_scheduler_latency(tsk, delta >> 10, 0);
+               }
+       }
  }
  
  /*
   * Task is being enqueued - update stats:
   */
  static inline void
- update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  {
+       if (!schedstat_enabled())
+               return;
        /*
         * Are we enqueueing a waiting task? (for current tasks
         * a dequeue/enqueue event is a NOP)
         */
        if (se != cfs_rq->curr)
                update_stats_wait_start(cfs_rq, se);
+       if (flags & ENQUEUE_WAKEUP)
+               update_stats_enqueue_sleeper(cfs_rq, se);
  }
  
  static inline void
  update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  {
+       if (!schedstat_enabled())
+               return;
        /*
         * Mark the end of the wait period if dequeueing a
         * waiting task:
        if (se != cfs_rq->curr)
                update_stats_wait_end(cfs_rq, se);
  
-       if (flags & DEQUEUE_SLEEP) {
-               if (entity_is_task(se)) {
-                       struct task_struct *tsk = task_of(se);
+       if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
+               struct task_struct *tsk = task_of(se);
  
-                       if (tsk->state & TASK_INTERRUPTIBLE)
-                               se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
-                       if (tsk->state & TASK_UNINTERRUPTIBLE)
-                               se->statistics.block_start = rq_clock(rq_of(cfs_rq));
-               }
+               if (tsk->state & TASK_INTERRUPTIBLE)
+                       schedstat_set(se->statistics.sleep_start,
+                                     rq_clock(rq_of(cfs_rq)));
+               if (tsk->state & TASK_UNINTERRUPTIBLE)
+                       schedstat_set(se->statistics.block_start,
+                                     rq_clock(rq_of(cfs_rq)));
        }
- }
- #else
- static inline void
- update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
- {
  }
  
- static inline void
- update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
- {
- }
- static inline void
- update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
- {
- }
- static inline void
- update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- {
- }
- #endif
  /*
   * We are picking a new current task - update its stats:
   */
@@@ -1514,7 -1583,8 +1583,8 @@@ balance
         * Call select_idle_sibling to maybe find a better one.
         */
        if (!cur)
-               env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
+               env->dst_cpu = select_idle_sibling(env->p, env->src_cpu,
+                                                  env->dst_cpu);
  
  assign:
        task_numa_assign(env, cur, imp);
@@@ -2803,9 -2873,21 +2873,21 @@@ __update_load_avg(u64 now, int cpu, str
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
- /*
-  * Updating tg's load_avg is necessary before update_cfs_share (which is done)
-  * and effective_load (which is not done because it is too costly).
+ /**
+  * update_tg_load_avg - update the tg's load avg
+  * @cfs_rq: the cfs_rq whose avg changed
+  * @force: update regardless of how small the difference
+  *
+  * This function 'ensures': tg->load_avg := \Sum tg->cfs_rq[]->avg.load.
+  * However, because tg->load_avg is a global value there are performance
+  * considerations.
+  *
+  * In order to avoid having to look at the other cfs_rq's, we use a
+  * differential update where we store the last value we propagated. This in
+  * turn allows skipping updates if the differential is 'small'.
+  *
+  * Updating tg's load_avg is necessary before update_cfs_share() (which is
+  * done) and effective_load() (which is not done because it is too costly).
   */
  static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
  {
@@@ -2875,7 -2957,12 +2957,7 @@@ static inline void update_tg_load_avg(s
  
  static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
  {
 -      struct rq *rq = rq_of(cfs_rq);
 -      int cpu = cpu_of(rq);
 -
 -      if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
 -              unsigned long max = rq->cpu_capacity_orig;
 -
 +      if (&this_rq()->cfs == cfs_rq) {
                /*
                 * There are a few boundary cases this might miss but it should
                 * get called often enough that that should (hopefully) not be
                 *
                 * See cpu_util().
                 */
 -              cpufreq_update_util(rq_clock(rq),
 -                                  min(cfs_rq->avg.util_avg, max), max);
 +              cpufreq_update_util(rq_of(cfs_rq), 0);
        }
  }
  
   *
   * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
   *
-  * Returns true if the load decayed or we removed utilization. It is expected
-  * that one calls update_tg_load_avg() on this condition, but after you've
-  * modified the cfs_rq avg (attach/detach), such that we propagate the new
-  * avg up.
+  * Returns true if the load decayed or we removed load.
+  *
+  * Since both these conditions indicate a changed cfs_rq->avg.load we should
+  * call update_tg_load_avg() when this function returns true.
   */
  static inline int
  update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
@@@ -3153,7 -3241,10 +3235,7 @@@ update_cfs_rq_load_avg(u64 now, struct 
  
  static inline void update_load_avg(struct sched_entity *se, int not_used)
  {
 -      struct cfs_rq *cfs_rq = cfs_rq_of(se);
 -      struct rq *rq = rq_of(cfs_rq);
 -
 -      cpufreq_trigger_update(rq_clock(rq));
 +      cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
  }
  
  static inline void
@@@ -3174,68 -3265,6 +3256,6 @@@ static inline int idle_balance(struct r
  
  #endif /* CONFIG_SMP */
  
- static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
- {
- #ifdef CONFIG_SCHEDSTATS
-       struct task_struct *tsk = NULL;
-       if (entity_is_task(se))
-               tsk = task_of(se);
-       if (se->statistics.sleep_start) {
-               u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
-               if ((s64)delta < 0)
-                       delta = 0;
-               if (unlikely(delta > se->statistics.sleep_max))
-                       se->statistics.sleep_max = delta;
-               se->statistics.sleep_start = 0;
-               se->statistics.sum_sleep_runtime += delta;
-               if (tsk) {
-                       account_scheduler_latency(tsk, delta >> 10, 1);
-                       trace_sched_stat_sleep(tsk, delta);
-               }
-       }
-       if (se->statistics.block_start) {
-               u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
-               if ((s64)delta < 0)
-                       delta = 0;
-               if (unlikely(delta > se->statistics.block_max))
-                       se->statistics.block_max = delta;
-               se->statistics.block_start = 0;
-               se->statistics.sum_sleep_runtime += delta;
-               if (tsk) {
-                       if (tsk->in_iowait) {
-                               se->statistics.iowait_sum += delta;
-                               se->statistics.iowait_count++;
-                               trace_sched_stat_iowait(tsk, delta);
-                       }
-                       trace_sched_stat_blocked(tsk, delta);
-                       /*
-                        * Blocking time is in units of nanosecs, so shift by
-                        * 20 to get a milliseconds-range estimation of the
-                        * amount of time that the task spent sleeping:
-                        */
-                       if (unlikely(prof_on == SLEEP_PROFILING)) {
-                               profile_hits(SLEEP_PROFILING,
-                                               (void *)get_wchan(tsk),
-                                               delta >> 20);
-                       }
-                       account_scheduler_latency(tsk, delta >> 10, 0);
-               }
-       }
- #endif
- }
  static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
  #ifdef CONFIG_SCHED_DEBUG
                d = -d;
  
        if (d > 3*sysctl_sched_latency)
-               schedstat_inc(cfs_rqnr_spread_over);
+               schedstat_inc(cfs_rq->nr_spread_over);
  #endif
  }
  
@@@ -3362,17 -3391,12 +3382,12 @@@ enqueue_entity(struct cfs_rq *cfs_rq, s
        account_entity_enqueue(cfs_rq, se);
        update_cfs_shares(cfs_rq);
  
-       if (flags & ENQUEUE_WAKEUP) {
+       if (flags & ENQUEUE_WAKEUP)
                place_entity(cfs_rq, se, 0);
-               if (schedstat_enabled())
-                       enqueue_sleeper(cfs_rq, se);
-       }
  
        check_schedstat_required();
-       if (schedstat_enabled()) {
-               update_stats_enqueue(cfs_rq, se);
-               check_spread(cfs_rq, se);
-       }
+       update_stats_enqueue(cfs_rq, se, flags);
+       check_spread(cfs_rq, se);
        if (!curr)
                __enqueue_entity(cfs_rq, se);
        se->on_rq = 1;
@@@ -3439,8 -3463,7 +3454,7 @@@ dequeue_entity(struct cfs_rq *cfs_rq, s
        update_curr(cfs_rq);
        dequeue_entity_load_avg(cfs_rq, se);
  
-       if (schedstat_enabled())
-               update_stats_dequeue(cfs_rq, se, flags);
+       update_stats_dequeue(cfs_rq, se, flags);
  
        clear_buddies(cfs_rq, se);
  
@@@ -3514,25 -3537,25 +3528,25 @@@ set_next_entity(struct cfs_rq *cfs_rq, 
                 * a CPU. So account for the time it spent waiting on the
                 * runqueue.
                 */
-               if (schedstat_enabled())
-                       update_stats_wait_end(cfs_rq, se);
+               update_stats_wait_end(cfs_rq, se);
                __dequeue_entity(cfs_rq, se);
                update_load_avg(se, 1);
        }
  
        update_stats_curr_start(cfs_rq, se);
        cfs_rq->curr = se;
- #ifdef CONFIG_SCHEDSTATS
        /*
         * Track our maximum slice length, if the CPU's load is at
         * least twice that of our own weight (i.e. dont track it
         * when there are only lesser-weight tasks around):
         */
        if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
-               se->statistics.slice_max = max(se->statistics.slice_max,
-                       se->sum_exec_runtime - se->prev_sum_exec_runtime);
+               schedstat_set(se->statistics.slice_max,
+                       max((u64)schedstat_val(se->statistics.slice_max),
+                           se->sum_exec_runtime - se->prev_sum_exec_runtime));
        }
- #endif
        se->prev_sum_exec_runtime = se->sum_exec_runtime;
  }
  
@@@ -3611,13 -3634,10 +3625,10 @@@ static void put_prev_entity(struct cfs_
        /* throttle cfs_rqs exceeding runtime */
        check_cfs_rq_runtime(cfs_rq);
  
-       if (schedstat_enabled()) {
-               check_spread(cfs_rq, prev);
-               if (prev->on_rq)
-                       update_stats_wait_start(cfs_rq, prev);
-       }
+       check_spread(cfs_rq, prev);
  
        if (prev->on_rq) {
+               update_stats_wait_start(cfs_rq, prev);
                /* Put 'current' back into the tree. */
                __enqueue_entity(cfs_rq, prev);
                /* in !on_rq case, update occurred at dequeue */
@@@ -5082,18 -5102,18 +5093,18 @@@ static int wake_wide(struct task_struc
        return 1;
  }
  
- static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
+                      int prev_cpu, int sync)
  {
        s64 this_load, load;
        s64 this_eff_load, prev_eff_load;
-       int idx, this_cpu, prev_cpu;
+       int idx, this_cpu;
        struct task_group *tg;
        unsigned long weight;
        int balanced;
  
        idx       = sd->wake_idx;
        this_cpu  = smp_processor_id();
-       prev_cpu  = task_cpu(p);
        load      = source_load(prev_cpu, idx);
        this_load = target_load(this_cpu, idx);
  
  
        balanced = this_eff_load <= prev_eff_load;
  
-       schedstat_inc(pse.statistics.nr_wakeups_affine_attempts);
+       schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
  
        if (!balanced)
                return 0;
  
-       schedstat_inc(sdttwu_move_affine);
-       schedstat_inc(pse.statistics.nr_wakeups_affine);
+       schedstat_inc(sd->ttwu_move_affine);
+       schedstat_inc(p->se.statistics.nr_wakeups_affine);
  
        return 1;
  }
@@@ -5219,6 -5239,10 +5230,10 @@@ find_idlest_cpu(struct sched_group *gro
        int shallowest_idle_cpu = -1;
        int i;
  
+       /* Check if we have any choice: */
+       if (group->group_weight == 1)
+               return cpumask_first(sched_group_cpus(group));
        /* Traverse only the allowed CPUs */
        for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
                if (idle_cpu(i)) {
  /*
   * Try and locate an idle CPU in the sched_domain.
   */
- static int select_idle_sibling(struct task_struct *p, int target)
+ static int select_idle_sibling(struct task_struct *p, int prev, int target)
  {
        struct sched_domain *sd;
        struct sched_group *sg;
-       int i = task_cpu(p);
  
        if (idle_cpu(target))
                return target;
        /*
         * If the prevous cpu is cache affine and idle, don't be stupid.
         */
-       if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
-               return i;
+       if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+               return prev;
  
        /*
         * Otherwise, iterate the domains and find an eligible idle cpu.
        for_each_lower_domain(sd) {
                sg = sd->groups;
                do {
+                       int i;
                        if (!cpumask_intersects(sched_group_cpus(sg),
                                                tsk_cpus_allowed(p)))
                                goto next;
@@@ -5351,6 -5376,32 +5367,32 @@@ static int cpu_util(int cpu
        return (util >= capacity) ? capacity : util;
  }
  
+ static inline int task_util(struct task_struct *p)
+ {
+       return p->se.avg.util_avg;
+ }
+ /*
+  * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
+  * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
+  *
+  * In that case WAKE_AFFINE doesn't make sense and we'll let
+  * BALANCE_WAKE sort things out.
+  */
+ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
+ {
+       long min_cap, max_cap;
+       min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
+       max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
+       /* Minimum capacity is close to max, no need to abort wake_affine */
+       if (max_cap - min_cap < max_cap >> 3)
+               return 0;
+       return min_cap * 1024 < task_util(p) * capacity_margin;
+ }
  /*
   * select_task_rq_fair: Select target runqueue for the waking task in domains
   * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@@ -5374,7 -5425,8 +5416,8 @@@ select_task_rq_fair(struct task_struct 
  
        if (sd_flag & SD_BALANCE_WAKE) {
                record_wakee(p);
-               want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+               want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
+                             && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
        }
  
        rcu_read_lock();
  
        if (affine_sd) {
                sd = NULL; /* Prefer wake_affine over balance flags */
-               if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
+               if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, sync))
                        new_cpu = cpu;
        }
  
        if (!sd) {
                if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
-                       new_cpu = select_idle_sibling(p, new_cpu);
+                       new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
  
        } else while (sd) {
                struct sched_group *group;
@@@ -5930,7 -5982,7 +5973,7 @@@ static bool yield_to_task_fair(struct r
   *
   * The adjacency matrix of the resulting graph is given by:
   *
-  *             log_2 n     
+  *             log_2 n
   *   A_i,j = \Union     (i % 2^k == 0) && i / 2^(k+1) == j / 2^(k+1)  (6)
   *             k = 0
   *
   *
   * [XXX write more on how we solve this.. _after_ merging pjt's patches that
   *      rewrite all of this once again.]
-  */ 
+  */
  
  static unsigned long __read_mostly max_load_balance_interval = HZ/10;
  
@@@ -6124,7 -6176,7 +6167,7 @@@ int can_migrate_task(struct task_struc
        if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
                int cpu;
  
-               schedstat_inc(pse.statistics.nr_failed_migrations_affine);
+               schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
  
                env->flags |= LBF_SOME_PINNED;
  
        env->flags &= ~LBF_ALL_PINNED;
  
        if (task_running(env->src_rq, p)) {
-               schedstat_inc(pse.statistics.nr_failed_migrations_running);
+               schedstat_inc(p->se.statistics.nr_failed_migrations_running);
                return 0;
        }
  
        if (tsk_cache_hot <= 0 ||
            env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
                if (tsk_cache_hot == 1) {
-                       schedstat_inc(env->sdlb_hot_gained[env->idle]);
-                       schedstat_inc(pse.statistics.nr_forced_migrations);
+                       schedstat_inc(env->sd->lb_hot_gained[env->idle]);
+                       schedstat_inc(p->se.statistics.nr_forced_migrations);
                }
                return 1;
        }
  
-       schedstat_inc(pse.statistics.nr_failed_migrations_hot);
+       schedstat_inc(p->se.statistics.nr_failed_migrations_hot);
        return 0;
  }
  
@@@ -6218,7 -6270,7 +6261,7 @@@ static struct task_struct *detach_one_t
                 * so we can safely collect stats here rather than
                 * inside detach_tasks().
                 */
-               schedstat_inc(env->sdlb_gained[env->idle]);
+               schedstat_inc(env->sd->lb_gained[env->idle]);
                return p;
        }
        return NULL;
@@@ -6310,7 -6362,7 +6353,7 @@@ next
         * so we can safely collect detach_one_task() stats here rather
         * than inside detach_one_task().
         */
-       schedstat_add(env->sdlb_gained[env->idle], detached);
+       schedstat_add(env->sd->lb_gained[env->idle], detached);
  
        return detached;
  }
@@@ -6638,7 -6690,7 +6681,7 @@@ void update_group_capacity(struct sched
                /*
                 * !SD_OVERLAP domains can assume that child groups
                 * span the current group.
-                */ 
+                */
  
                group = child->groups;
                do {
@@@ -7138,7 -7190,7 +7181,7 @@@ static inline void calculate_imbalance(
                load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE;
                if (load_above_capacity > busiest->group_capacity) {
                        load_above_capacity -= busiest->group_capacity;
-                       load_above_capacity *= NICE_0_LOAD;
+                       load_above_capacity *= scale_load_down(NICE_0_LOAD);
                        load_above_capacity /= busiest->group_capacity;
                } else
                        load_above_capacity = ~0UL;
@@@ -7451,7 -7503,7 +7494,7 @@@ static int load_balance(int this_cpu, s
  
        cpumask_copy(cpus, cpu_active_mask);
  
-       schedstat_inc(sdlb_count[idle]);
+       schedstat_inc(sd->lb_count[idle]);
  
  redo:
        if (!should_we_balance(&env)) {
  
        group = find_busiest_group(&env);
        if (!group) {
-               schedstat_inc(sdlb_nobusyg[idle]);
+               schedstat_inc(sd->lb_nobusyg[idle]);
                goto out_balanced;
        }
  
        busiest = find_busiest_queue(&env, group);
        if (!busiest) {
-               schedstat_inc(sdlb_nobusyq[idle]);
+               schedstat_inc(sd->lb_nobusyq[idle]);
                goto out_balanced;
        }
  
        BUG_ON(busiest == env.dst_rq);
  
-       schedstat_add(sdlb_imbalance[idle], env.imbalance);
+       schedstat_add(sd->lb_imbalance[idle], env.imbalance);
  
        env.src_cpu = busiest->cpu;
        env.src_rq = busiest;
@@@ -7580,7 -7632,7 +7623,7 @@@ more_balance
        }
  
        if (!ld_moved) {
-               schedstat_inc(sdlb_failed[idle]);
+               schedstat_inc(sd->lb_failed[idle]);
                /*
                 * Increment the failure counter only on periodic balance.
                 * We do not want newidle balance, which can be very
@@@ -7663,7 -7715,7 +7706,7 @@@ out_all_pinned
         * we can't migrate them. Let the imbalance flag set so parent level
         * can try to migrate them.
         */
-       schedstat_inc(sdlb_balanced[idle]);
+       schedstat_inc(sd->lb_balanced[idle]);
  
        sd->nr_balance_failed = 0;
  
@@@ -7695,11 -7747,12 +7738,12 @@@ get_sd_balance_interval(struct sched_do
  }
  
  static inline void
- update_next_balance(struct sched_domain *sd, int cpu_busy, unsigned long *next_balance)
+ update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
  {
        unsigned long interval, next;
  
-       interval = get_sd_balance_interval(sd, cpu_busy);
+       /* used by idle balance, so cpu_busy = 0 */
+       interval = get_sd_balance_interval(sd, 0);
        next = sd->last_balance + interval;
  
        if (time_after(*next_balance, next))
@@@ -7729,7 -7782,7 +7773,7 @@@ static int idle_balance(struct rq *this
                rcu_read_lock();
                sd = rcu_dereference_check_sched_domain(this_rq->sd);
                if (sd)
-                       update_next_balance(sd, 0, &next_balance);
+                       update_next_balance(sd, &next_balance);
                rcu_read_unlock();
  
                goto out;
                        continue;
  
                if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
-                       update_next_balance(sd, 0, &next_balance);
+                       update_next_balance(sd, &next_balance);
                        break;
                }
  
                        curr_cost += domain_cost;
                }
  
-               update_next_balance(sd, 0, &next_balance);
+               update_next_balance(sd, &next_balance);
  
                /*
                 * Stop searching for tasks to pull if there are
@@@ -7855,15 -7908,15 +7899,15 @@@ static int active_load_balance_cpu_stop
                        .idle           = CPU_IDLE,
                };
  
-               schedstat_inc(sdalb_count);
+               schedstat_inc(sd->alb_count);
  
                p = detach_one_task(&env);
                if (p) {
-                       schedstat_inc(sdalb_pushed);
+                       schedstat_inc(sd->alb_pushed);
                        /* Active balancing done, reset the failure counter. */
                        sd->nr_balance_failed = 0;
                } else {
-                       schedstat_inc(sdalb_failed);
+                       schedstat_inc(sd->alb_failed);
                }
        }
        rcu_read_unlock();
@@@ -8274,7 -8327,7 +8318,7 @@@ static void nohz_idle_balance(struct r
   * run_rebalance_domains is triggered when needed from the scheduler tick.
   * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
   */
 -static void run_rebalance_domains(struct softirq_action *h)
 +static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
  {
        struct rq *this_rq = this_rq();
        enum cpu_idle_type idle = this_rq->idle_balance ?
@@@ -8432,7 -8485,6 +8476,6 @@@ static void detach_task_cfs_rq(struct t
        struct sched_entity *se = &p->se;
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
        u64 now = cfs_rq_clock_task(cfs_rq);
-       int tg_update;
  
        if (!vruntime_normalized(p)) {
                /*
        }
  
        /* Catch up with the cfs_rq and remove our load when we leave */
-       tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+       update_cfs_rq_load_avg(now, cfs_rq, false);
        detach_entity_load_avg(cfs_rq, se);
-       if (tg_update)
-               update_tg_load_avg(cfs_rq, false);
+       update_tg_load_avg(cfs_rq, false);
  }
  
  static void attach_task_cfs_rq(struct task_struct *p)
        struct sched_entity *se = &p->se;
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
        u64 now = cfs_rq_clock_task(cfs_rq);
-       int tg_update;
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
        /*
  #endif
  
        /* Synchronize task with its cfs_rq */
-       tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+       update_cfs_rq_load_avg(now, cfs_rq, false);
        attach_entity_load_avg(cfs_rq, se);
-       if (tg_update)
-               update_tg_load_avg(cfs_rq, false);
+       update_tg_load_avg(cfs_rq, false);
  
        if (!vruntime_normalized(p))
                se->vruntime += cfs_rq->min_vruntime;
diff --combined kernel/sched/sched.h
index b7fc1ced438018c8a63c58d3a120a7a8a6348827,420c05d099c3867dec0083249b1a860d76506feb..8ac71b73a8e880bfff0891633f1d8ed0518d7965
@@@ -565,6 -565,8 +565,8 @@@ struct root_domain 
         */
        cpumask_var_t rto_mask;
        struct cpupri cpupri;
+       unsigned long max_cpu_capacity;
  };
  
  extern struct root_domain def_root_domain;
@@@ -597,7 -599,6 +599,6 @@@ struct rq 
  #ifdef CONFIG_SMP
        unsigned long last_load_update_tick;
  #endif /* CONFIG_SMP */
-       u64 nohz_stamp;
        unsigned long nohz_flags;
  #endif /* CONFIG_NO_HZ_COMMON */
  #ifdef CONFIG_NO_HZ_FULL
@@@ -1763,13 -1764,27 +1764,13 @@@ DECLARE_PER_CPU(struct update_util_dat
  
  /**
   * cpufreq_update_util - Take a note about CPU utilization changes.
 - * @time: Current time.
 - * @util: Current utilization.
 - * @max: Utilization ceiling.
 + * @rq: Runqueue to carry out the update for.
 + * @flags: Update reason flags.
   *
 - * This function is called by the scheduler on every invocation of
 - * update_load_avg() on the CPU whose utilization is being updated.
 + * This function is called by the scheduler on the CPU whose utilization is
 + * being updated.
   *
   * It can only be called from RCU-sched read-side critical sections.
 - */
 -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
 -{
 -       struct update_util_data *data;
 -
 -       data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
 -       if (data)
 -               data->func(data, time, util, max);
 -}
 -
 -/**
 - * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
 - * @time: Current time.
   *
   * The way cpufreq is currently arranged requires it to evaluate the CPU
   * performance state (frequency/voltage) on a regular basis to prevent it from
   * but that really is a band-aid.  Going forward it should be replaced with
   * solutions targeted more specifically at RT and DL tasks.
   */
 -static inline void cpufreq_trigger_update(u64 time)
 +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
 +{
 +      struct update_util_data *data;
 +
 +      data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
 +      if (data)
 +              data->func(data, rq_clock(rq), flags);
 +}
 +
 +static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
  {
 -      cpufreq_update_util(time, ULONG_MAX, 0);
 +      if (cpu_of(rq) == smp_processor_id())
 +              cpufreq_update_util(rq, flags);
  }
  #else
 -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
 -static inline void cpufreq_trigger_update(u64 time) {}
 +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 +static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
  #endif /* CONFIG_CPU_FREQ */
  
  #ifdef arch_scale_freq_capacity
diff --combined kernel/softirq.c
index 34033fd09c8c66ffe1249db174a3db780e28df94,c372114494f5f879f80defc9c613fdc31dc1c6f4..ad039266a7b382c4804e238f4f9897c3a04bf7a0
@@@ -482,7 -482,7 +482,7 @@@ void __tasklet_hi_schedule_first(struc
  }
  EXPORT_SYMBOL(__tasklet_hi_schedule_first);
  
 -static void tasklet_action(struct softirq_action *a)
 +static __latent_entropy void tasklet_action(struct softirq_action *a)
  {
        struct tasklet_struct *list;
  
        }
  }
  
 -static void tasklet_hi_action(struct softirq_action *a)
 +static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
  {
        struct tasklet_struct *list;
  
@@@ -700,7 -700,7 +700,7 @@@ void tasklet_kill_immediate(struct task
        BUG();
  }
  
- static void takeover_tasklets(unsigned int cpu)
+ static int takeover_tasklets(unsigned int cpu)
  {
        /* CPU is dead, so no lock needed. */
        local_irq_disable();
        raise_softirq_irqoff(HI_SOFTIRQ);
  
        local_irq_enable();
+       return 0;
  }
+ #else
+ #define takeover_tasklets     NULL
  #endif /* CONFIG_HOTPLUG_CPU */
  
- static int cpu_callback(struct notifier_block *nfb, unsigned long action,
-                       void *hcpu)
- {
-       switch (action) {
- #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               takeover_tasklets((unsigned long)hcpu);
-               break;
- #endif /* CONFIG_HOTPLUG_CPU */
-       }
-       return NOTIFY_OK;
- }
- static struct notifier_block cpu_nfb = {
-       .notifier_call = cpu_callback
- };
  static struct smp_hotplug_thread softirq_threads = {
        .store                  = &ksoftirqd,
        .thread_should_run      = ksoftirqd_should_run,
  
  static __init int spawn_ksoftirqd(void)
  {
-       register_cpu_notifier(&cpu_nfb);
+       cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
+                                 takeover_tasklets);
        BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
  
        return 0;
This page took 0.115626 seconds and 5 git commands to generate.