Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Aug 2016 20:55:06 +0000 (13:55 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Aug 2016 20:55:06 +0000 (13:55 -0700)
Pull timer fixes from Ingo Molnar:
 "Misc fixes: a /dev/rtc regression fix, two APIC timer period
  calibration fixes, an ARM clocksource driver fix and a NOHZ
  power use regression fix"

* 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/hpet: Fix /dev/rtc breakage caused by RTC cleanup
  x86/timers/apic: Inform TSC deadline clockevent device about recalibration
  x86/timers/apic: Fix imprecise timer interrupts by eliminating TSC clockevents frequency roundoff error
  timers: Fix get_next_timer_interrupt() computation
  clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered

143 files changed:
MAINTAINERS
arch/arm/Makefile
arch/arm/boot/dts/arm-realview-pbx-a9.dts
arch/arm/boot/dts/integratorap.dts
arch/arm/boot/dts/integratorcp.dts
arch/arm/boot/dts/keystone.dtsi
arch/arm/boot/dts/tegra124-jetson-tk1.dts
arch/arm/configs/aspeed_g4_defconfig
arch/arm/configs/aspeed_g5_defconfig
arch/arm/kernel/sys_oabi-compat.c
arch/arm/mach-clps711x/Kconfig
arch/arm/mach-mvebu/Makefile
arch/arm/mach-oxnas/Kconfig
arch/arm/mach-pxa/corgi.c
arch/arm/mach-pxa/spitz.c
arch/arm/mach-realview/Makefile
arch/arm/mach-s5pv210/Makefile
arch/arm/mach-shmobile/platsmp.c
arch/arm64/Kconfig.platforms
arch/arm64/boot/dts/exynos/exynos7-espresso.dts
arch/metag/mm/init.c
arch/powerpc/Makefile
arch/powerpc/crypto/crc32c-vpmsum_glue.c
arch/powerpc/include/asm/cpuidle.h
arch/powerpc/include/asm/feature-fixups.h
arch/powerpc/include/asm/switch_to.h
arch/powerpc/include/asm/xics.h
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/idle_book3s.S
arch/powerpc/kernel/mce.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/ptrace.c
arch/powerpc/kernel/setup_32.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/vdso.c
arch/powerpc/kernel/vdso32/Makefile
arch/powerpc/kernel/vdso64/Makefile
arch/powerpc/lib/checksum_32.S
arch/powerpc/lib/feature-fixups.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/pasemi/iommu.c
arch/powerpc/platforms/powernv/opal-irqchip.c
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/pseries/hotplug-memory.c
arch/powerpc/sysdev/xics/Kconfig
arch/powerpc/sysdev/xics/ics-opal.c
arch/powerpc/sysdev/xics/ics-rtas.c
arch/powerpc/sysdev/xics/xics-common.c
arch/s390/Kconfig
arch/x86/events/intel/uncore_snb.c
arch/x86/events/intel/uncore_snbep.c
arch/x86/kernel/uprobes.c
arch/x86/platform/uv/bios_uv.c
drivers/acpi/nfit/core.c
drivers/block/rbd.c
drivers/block/virtio_blk.c
drivers/firmware/efi/capsule-loader.c
drivers/firmware/efi/capsule.c
drivers/misc/cxl/context.c
drivers/misc/cxl/cxl.h
drivers/misc/cxl/native.c
drivers/misc/cxl/pci.c
drivers/misc/cxl/vphb.c
drivers/nvdimm/btt.c
drivers/nvdimm/btt_devs.c
drivers/nvdimm/nd.h
drivers/pci/msi.c
drivers/rapidio/rio_cm.c
drivers/s390/virtio/Makefile
drivers/s390/virtio/kvm_virtio.c
drivers/vhost/vsock.c
drivers/virtio/virtio_ring.c
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/send.c
fs/btrfs/tree-log.c
fs/ceph/caps.c
fs/ceph/mds_client.c
fs/nfs/nfs42proc.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4proc.c
fs/nfs/nfs4renewd.c
fs/nfs/nfs4state.c
fs/proc/meminfo.c
include/asm-generic/qrwlock.h
include/linux/msi.h
include/linux/perf_event.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/xprt.h
include/uapi/linux/virtio_vsock.h
include/uapi/misc/cxl.h
kernel/events/core.c
kernel/futex.c
kernel/irq/msi.c
kernel/locking/qspinlock_paravirt.h
kernel/locking/qspinlock_stat.h
kernel/sched/core.c
kernel/sched/cpudeadline.c
kernel/sched/cputime.c
kernel/sched/deadline.c
kernel/sched/fair.c
mm/hugetlb.c
mm/kasan/quarantine.c
mm/memcontrol.c
mm/memory_hotplug.c
mm/oom_kill.c
mm/page_alloc.c
mm/rmap.c
mm/shmem.c
mm/slub.c
net/9p/trans_virtio.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/string_table.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/clnt.c
net/sunrpc/xprt.c
net/sunrpc/xprtsock.c
scripts/get_maintainer.pl
sound/pci/hda/hda_intel.c
sound/usb/quirks.c
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/disabled-features.h
tools/arch/x86/include/asm/required-features.h
tools/arch/x86/include/uapi/asm/vmx.h
tools/include/uapi/linux/bpf.h
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-script.txt
tools/perf/arch/powerpc/util/sym-handling.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/probe-finder.c
tools/perf/util/sort.c
tools/testing/nvdimm/test/nfit.c
tools/testing/selftests/powerpc/Makefile

index 20bb1d00098c70dacad7a9c778087f9319b0c5c6..a306795a7450637be47bcec981c18776d513deaa 100644 (file)
@@ -1004,6 +1004,7 @@ N:        meson
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:     Tsahee Zidenberg <tsahee@annapurnalabs.com>
 M:     Antoine Tenart <antoine.tenart@free-electrons.com>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-alpine/
 F:     arch/arm/boot/dts/alpine*
index 56ea5c60b31883bdf52ca01c4748061c4ed39871..61f6ccc19cfa94364e777cc68d10ce5a24093c0f 100644 (file)
@@ -260,12 +260,14 @@ machdirs := $(patsubst %,arch/arm/mach-%/,$(machine-y))
 platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y)))
 
 ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y)
+ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y)
 ifeq ($(KBUILD_SRC),)
 KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(machdirs) $(platdirs))
 else
 KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs))
 endif
 endif
+endif
 
 export TEXT_OFFSET GZFLAGS MMUEXT
 
index db808f92dd79c975b1f059d824e263080208d20b..90d00b407f851dfce2706d8e85bb436ac39a3921 100644 (file)
                 * associativity as these may be erroneously set
                 * up by boot loader(s).
                 */
-               cache-size = <1048576>; // 1MB
-               cache-sets = <4096>;
+               cache-size = <131072>; // 128KB
+               cache-sets = <512>;
                cache-line-size = <32>;
                arm,parity-disable;
-               arm,tag-latency = <1>;
-               arm,data-latency = <1 1>;
-               arm,dirty-latency = <1>;
+               arm,tag-latency = <1 1 1>;
+               arm,data-latency = <1 1 1>;
        };
 
        scu: scu@1f000000 {
index cf06e32ee108a221c330c8ff521e0a82d8b78e4d..4b34b54e09a193ebd93ef82566d5c012274241d5 100644 (file)
@@ -42,7 +42,7 @@
        };
 
        syscon {
-               compatible = "arm,integrator-ap-syscon";
+               compatible = "arm,integrator-ap-syscon", "syscon";
                reg = <0x11000000 0x100>;
                interrupt-parent = <&pic>;
                /* These are the logical module IRQs */
index d43f15b4f79a242d2437f6ea45626556380c010c..79430fbfec3bd17ec311625e67695aabee3e0726 100644 (file)
@@ -94,7 +94,7 @@
        };
 
        syscon {
-               compatible = "arm,integrator-cp-syscon";
+               compatible = "arm,integrator-cp-syscon", "syscon";
                reg = <0xcb000000 0x100>;
        };
 
index 00cb314d5e4db81fcc035b2a68101a557cf751a4..e23f46d15c806566abc2ec88828bc8d053ffd6e5 100644 (file)
                cpu_on          = <0x84000003>;
        };
 
-       psci {
-               compatible      = "arm,psci";
-               method          = "smc";
-               cpu_suspend     = <0x84000001>;
-               cpu_off         = <0x84000002>;
-               cpu_on          = <0x84000003>;
-       };
-
        soc {
                #address-cells = <1>;
                #size-cells = <1>;
index e52b82449a79528bc362d96fabe7b2d688abd78e..6403e0de540e842b952b3bb02b5673bbbfb3e683 100644 (file)
         *   Pin 41: BR_UART1_TXD
         *   Pin 44: BR_UART1_RXD
         */
-       serial@70006000 {
+       serial@0,70006000 {
                compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
                status = "okay";
        };
         *   Pin 71: UART2_CTS_L
         *   Pin 74: UART2_RTS_L
         */
-       serial@70006040 {
+       serial@0,70006040 {
                compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
                status = "okay";
        };
index b6e54ee9bdbd8e54a4c740d0d2308dd26f682b42..ca39c04fec6b7af28847b78b6b3ff36a75811b31 100644 (file)
@@ -58,7 +58,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_FIRMWARE_MEMMAP=y
 CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_PAGE_POISONING=y
index 89260516735720460b4002e462ae5d3e5811125b..4f366b0370e939a27056f0230b49140ddee85503 100644 (file)
@@ -59,7 +59,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_FIRMWARE_MEMMAP=y
 CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_PAGE_POISONING=y
index 087acb569b63a4bd90982e0c9b15fc2313636c53..5f221acd21aebb3ca1c2ee560fb68241bc1e02c9 100644 (file)
@@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
        mm_segment_t fs;
        long ret, err, i;
 
-       if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
+       if (maxevents <= 0 ||
+                       maxevents > (INT_MAX/sizeof(*kbuf)) ||
+                       maxevents > (INT_MAX/sizeof(*events)))
                return -EINVAL;
+       if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
+               return -EFAULT;
        kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
        if (!kbuf)
                return -ENOMEM;
@@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
 
        if (nsops < 1 || nsops > SEMOPM)
                return -EINVAL;
+       if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
+               return -EFAULT;
        sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
        if (!sops)
                return -ENOMEM;
index dc7c6edeab39a89d24f8211d4d11ace5494bafee..61284b9389cf5e41de92215b3fa45ee9c2bc79df 100644 (file)
@@ -1,13 +1,13 @@
 menuconfig ARCH_CLPS711X
        bool "Cirrus Logic EP721x/EP731x-based"
        depends on ARCH_MULTI_V4T
-       select ARCH_REQUIRE_GPIOLIB
        select AUTO_ZRELADDR
        select CLKSRC_OF
        select CLPS711X_TIMER
        select COMMON_CLK
        select CPU_ARM720T
        select GENERIC_CLOCKEVENTS
+       select GPIOLIB
        select MFD_SYSCON
        select OF_IRQ
        select USE_OF
index e53c6cfcab51cd12c798fd11d663686c77761b02..6c6497e80a7b13433d833923d8c5003c52039d9a 100644 (file)
@@ -1,5 +1,4 @@
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-       -I$(srctree)/arch/arm/plat-orion/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-orion/include
 
 AFLAGS_coherency_ll.o          := -Wa,-march=armv7-a
 CFLAGS_pmsu.o                  := -march=armv7-a
index 567496bd250a2fc5c8037ed203dc71d203521be0..29100beb2e7f201403ce9bc84c78dab1aea818eb 100644 (file)
@@ -11,11 +11,13 @@ if ARCH_OXNAS
 
 config MACH_OX810SE
        bool "Support OX810SE Based Products"
+       select ARCH_HAS_RESET_CONTROLLER
        select COMMON_CLK_OXNAS
        select CPU_ARM926T
        select MFD_SYSCON
        select OXNAS_RPS_TIMER
        select PINCTRL_OXNAS
+       select RESET_CONTROLLER
        select RESET_OXNAS
        select VERSATILE_FPGA_IRQ
        help
index dc109dc3a622834bcca135322672e754cc1db488..10bfdb169366b0a7b4e6403cd3fd5242a940f3e8 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>      /* symbol_get ; symbol_put */
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/major.h>
index 1080580b1343d1bdf164230bff82d6fed2e8b0d5..2c150bfc0cd5128dcc252e63ca485076e9469ba9 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>      /* symbol_get ; symbol_put */
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/gpio_keys.h>
index dae8d86ef4ccc75c4bb3dfbb98aba9e7e12eec9f..4048821309566281d61e5dd478f2db7c921b9491 100644 (file)
@@ -1,8 +1,7 @@
 #
 # Makefile for the linux kernel.
 #
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-       -I$(srctree)/arch/arm/plat-versatile/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-versatile/include
 
 obj-y                                  := core.o
 obj-$(CONFIG_REALVIEW_DT)              += realview-dt.o
index 72b9e96715070f2c47b13b7f7aade101abb9586e..fa7fb716e388a7ef4f4da1b89f58090e60805245 100644 (file)
@@ -5,7 +5,7 @@
 #
 # Licensed under GPLv2
 
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/$(src)/include -I$(srctree)/arch/arm/plat-samsung/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/arch/arm/plat-samsung/include
 
 # Core
 
index f3dba6f356e29446c0960af2d37e51d2eacc8302..02e21bceb0856bc5ac5c769af3362afab2927c99 100644 (file)
@@ -40,5 +40,8 @@ bool shmobile_smp_cpu_can_disable(unsigned int cpu)
 bool __init shmobile_smp_init_fallback_ops(void)
 {
        /* fallback on PSCI/smp_ops if no other DT based method is detected */
+       if (!IS_ENABLED(CONFIG_SMP))
+               return false;
+
        return platform_can_secondary_boot() ? true : false;
 }
index bb2616b161576b4a664536ccae5c761544e93167..be5d824ebdba2dab24840bb7808abcc40da2053e 100644 (file)
@@ -8,7 +8,7 @@ config ARCH_SUNXI
 
 config ARCH_ALPINE
        bool "Annapurna Labs Alpine platform"
-       select ALPINE_MSI
+       select ALPINE_MSI if PCI
        help
          This enables support for the Annapurna Labs Alpine
          Soc family.
@@ -66,7 +66,7 @@ config ARCH_LG1K
 config ARCH_HISI
        bool "Hisilicon SoC Family"
        select ARM_TIMER_SP804
-       select HISILICON_IRQ_MBIGEN
+       select HISILICON_IRQ_MBIGEN if PCI
        help
          This enables support for Hisilicon ARMv8 SoC family
 
index 299f3ce969ab8517a602ff7addda417ecd5aa5f3..c528dd52ba2d39b30547ab964eda219b1068a043 100644 (file)
@@ -12,6 +12,7 @@
 /dts-v1/;
 #include "exynos7.dtsi"
 #include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/clock/samsung,s2mps11.h>
 
 / {
        model = "Samsung Exynos7 Espresso board based on EXYNOS7";
@@ -43,6 +44,8 @@
 
 &rtc {
        status = "okay";
+       clocks = <&clock_ccore PCLK_RTC>, <&s2mps15_osc S2MPS11_CLK_AP>;
+       clock-names = "rtc", "rtc_src";
 };
 
 &watchdog {
index 11fa51c89617deb1a303c6bfdbf82b2a32a1e4db..c0ec116b3993a3a61b852c9daf14a34ab0962e15 100644 (file)
@@ -390,7 +390,6 @@ void __init mem_init(void)
 
        free_all_bootmem();
        mem_init_print_info(NULL);
-       show_mem(0);
 }
 
 void free_initmem(void)
index ca254546cd05a1a4fe09e87e63945d7b38b8873f..1934707bf321ecf47a835bc7a0c4cd88f092ed07 100644 (file)
@@ -66,29 +66,28 @@ endif
 UTS_MACHINE := $(OLDARCH)
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC    += -mlittle-endian
-ifneq ($(cc-name),clang)
-override CC    += -mno-strict-align
-endif
-override AS    += -mlittle-endian
 override LD    += -EL
-override CROSS32CC += -mlittle-endian
 override CROSS32AS += -mlittle-endian
 LDEMULATION    := lppc
 GNUTARGET      := powerpcle
 MULTIPLEWORD   := -mno-multiple
 KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
 else
-ifeq ($(call cc-option-yn,-mbig-endian),y)
-override CC    += -mbig-endian
-override AS    += -mbig-endian
-endif
 override LD    += -EB
 LDEMULATION    := ppc
 GNUTARGET      := powerpc
 MULTIPLEWORD   := -mmultiple
 endif
 
+cflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mlittle-endian
+ifneq ($(cc-name),clang)
+  cflags-$(CONFIG_CPU_LITTLE_ENDIAN)   += -mno-strict-align
+endif
+
+aflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mlittle-endian
+
 ifeq ($(HAS_BIARCH),y)
 override AS    += -a$(CONFIG_WORD_SIZE)
 override LD    += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
@@ -232,6 +231,9 @@ cpu-as-$(CONFIG_E200)               += -Wa,-me200
 KBUILD_AFLAGS += $(cpu-as-y)
 KBUILD_CFLAGS += $(cpu-as-y)
 
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
+
 head-y                         := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
 head-$(CONFIG_8xx)             := arch/powerpc/kernel/head_8xx.o
 head-$(CONFIG_40x)             := arch/powerpc/kernel/head_40x.o
index bfe3d37a24ef3a24c07e5a9f7ed8df3a13b4bf59..9fa046d56ebadd6ad25e62b5a29a853b123cd30a 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <linux/cpufeature.h>
 #include <asm/switch_to.h>
 
 #define CHKSUM_BLOCK_SIZE      1
@@ -157,7 +158,7 @@ static void __exit crc32c_vpmsum_mod_fini(void)
        crypto_unregister_shash(&alg);
 }
 
-module_init(crc32c_vpmsum_mod_init);
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
 module_exit(crc32c_vpmsum_mod_fini);
 
 MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
index 3d7fc06532a16a7b620a58342d70e41fd69a33da..01b8a13f022467be64ccd46f248344bdf96e9a41 100644 (file)
@@ -19,4 +19,17 @@ extern u64 pnv_first_deep_stop_state;
 
 #endif
 
+/* Idle state entry routines */
+#ifdef CONFIG_PPC_P7_NAP
+#define        IDLE_STATE_ENTER_SEQ(IDLE_INST)                         \
+       /* Magic NAP/SLEEP/WINKLE mode enter sequence */        \
+       std     r0,0(r1);                                       \
+       ptesync;                                                \
+       ld      r0,0(r1);                                       \
+1:     cmp     cr0,r0,r0;                                      \
+       bne     1b;                                             \
+       IDLE_INST;                                              \
+       b       .
+#endif /* CONFIG_PPC_P7_NAP */
+
 #endif
index 57fec8ac7b924cdeabb4a21b65d46785e9fbf745..ddf54f5bbdd1c05efbd286ec305beac3c459d8d6 100644 (file)
@@ -186,6 +186,7 @@ label##3:                                           \
 
 #ifndef __ASSEMBLY__
 void apply_feature_fixups(void);
+void setup_feature_keys(void);
 #endif
 
 #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
index 0a74ebe934e1cbcb61105b63d54a959614bca33d..17c8380673a60637c61fec5772162bf0ae5523cb 100644 (file)
@@ -75,14 +75,6 @@ static inline void disable_kernel_spe(void)
 static inline void __giveup_spe(struct task_struct *t) { }
 #endif
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void flush_tmregs_to_thread(struct task_struct *);
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *t)
-{
-}
-#endif
-
 static inline void clear_task_ebb(struct task_struct *t)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
index f5f729c115781a8d6c9c1850f89558b06383c761..f0b238516e9b44b5afabc52a8460daaa6e81ca97 100644 (file)
@@ -159,6 +159,8 @@ extern void xics_teardown_cpu(void);
 extern void xics_kexec_teardown_cpu(int secondary);
 extern void xics_migrate_irqs_away(void);
 extern void icp_native_eoi(struct irq_data *d);
+extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern int xics_retrigger(struct irq_data *data);
 #ifdef CONFIG_SMP
 extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
                               unsigned int strict_check);
index c9bc78e9c6101b2ae5016efec858c5c38b0f4158..7429556eb8df7e468b447a1b6d4c541253c06295 100644 (file)
@@ -168,10 +168,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
        int n = 0, l = 0;
        char buffer[128];
 
-       n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
+       n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
                       edev->phb->global_number, pdn->busno,
                       PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
-       pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
+       pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
                edev->phb->global_number, pdn->busno,
                PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 
index 41091fdf9bd88fbe68d9ef15aeb04c1b59b18e77..df6d45eb41150185e7cc17baa82feead23fa27f3 100644 (file)
@@ -144,29 +144,14 @@ machine_check_pSeries_1:
         * vector
         */
        SET_SCRATCH0(r13)               /* save r13 */
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
-       /* Running native on arch 2.06 or later, check if we are
-        * waking up from nap. We only handle no state loss and
-        * supervisor state loss. We do -not- handle hypervisor
-        * state loss at this time.
+       /*
+        * Running native on arch 2.06 or later, we may wakeup from winkle
+        * inside machine check. If yes, then last bit of HSPGR0 would be set
+        * to 1. Hence clear it unconditionally.
         */
-       mfspr   r13,SPRN_SRR1
-       rlwinm. r13,r13,47-31,30,31
-       OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-       beq     9f
-
-       mfspr   r13,SPRN_SRR1
-       rlwinm. r13,r13,47-31,30,31
-       /* waking up from powersave (nap) state */
-       cmpwi   cr1,r13,2
-       /* Total loss of HV state is fatal. let's just stay stuck here */
-       OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-       bgt     cr1,.
-9:
-       OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
+       GET_PACA(r13)
+       clrrdi  r13,r13,1
+       SET_PACA(r13)
        EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
        b       machine_check_powernv_early
@@ -1273,25 +1258,51 @@ machine_check_handle_early:
         * Check if thread was in power saving mode. We come here when any
         * of the following is true:
         * a. thread wasn't in power saving mode
-        * b. thread was in power saving mode with no state loss or
-        *    supervisor state loss
+        * b. thread was in power saving mode with no state loss,
+        *    supervisor state loss or hypervisor state loss.
         *
-        * Go back to nap again if (b) is true.
+        * Go back to nap/sleep/winkle mode again if (b) is true.
         */
        rlwinm. r11,r12,47-31,30,31     /* Was it in power saving mode? */
        beq     4f                      /* No, it wasn;t */
        /* Thread was in power saving mode. Go back to nap again. */
        cmpwi   r11,2
-       bne     3f
-       /* Supervisor state loss */
+       blt     3f
+       /* Supervisor/Hypervisor state loss */
        li      r0,1
        stb     r0,PACA_NAPSTATELOST(r13)
 3:     bl      machine_check_queue_event
        MACHINE_CHECK_HANDLER_WINDUP
        GET_PACA(r13)
        ld      r1,PACAR1(r13)
-       li      r3,PNV_THREAD_NAP
-       b       pnv_enter_arch207_idle_mode
+       /*
+        * Check what idle state this CPU was in and go back to same mode
+        * again.
+        */
+       lbz     r3,PACA_THREAD_IDLE_STATE(r13)
+       cmpwi   r3,PNV_THREAD_NAP
+       bgt     10f
+       IDLE_STATE_ENTER_SEQ(PPC_NAP)
+       /* No return */
+10:
+       cmpwi   r3,PNV_THREAD_SLEEP
+       bgt     2f
+       IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+       /* No return */
+
+2:
+       /*
+        * Go back to winkle. Please note that this thread was woken up in
+        * machine check from winkle and have not restored the per-subcore
+        * state. Hence before going back to winkle, set last bit of HSPGR0
+        * to 1. This will make sure that if this thread gets woken up
+        * again at reset vector 0x100 then it will get chance to restore
+        * the subcore state.
+        */
+       ori     r13,r13,1
+       SET_PACA(r13)
+       IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
+       /* No return */
 4:
 #endif
        /*
index ba79d15f4ddd7c0d8ce946e15098d977a00338fa..2265c6398a17ec4af7fc5983df9dad7443ac2fce 100644 (file)
                                PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
                                PSSCR_MTL_MASK
 
-/* Idle state entry routines */
-
-#define        IDLE_STATE_ENTER_SEQ(IDLE_INST)                         \
-       /* Magic NAP/SLEEP/WINKLE mode enter sequence */        \
-       std     r0,0(r1);                                       \
-       ptesync;                                                \
-       ld      r0,0(r1);                                       \
-1:     cmp     cr0,r0,r0;                                      \
-       bne     1b;                                             \
-       IDLE_INST;                                              \
-       b       .
-
        .text
 
 /*
@@ -363,8 +351,8 @@ _GLOBAL(power9_idle_stop)
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
 _GLOBAL(pnv_restore_hyp_resource)
-       ld      r2,PACATOC(r13);
 BEGIN_FTR_SECTION
+       ld      r2,PACATOC(r13);
        /*
         * POWER ISA 3. Use PSSCR to determine if we
         * are waking up from deep idle state
@@ -395,6 +383,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         */
        clrldi  r5,r13,63
        clrrdi  r13,r13,1
+
+       /* Now that we are sure r13 is corrected, load TOC */
+       ld      r2,PACATOC(r13);
        cmpwi   cr4,r5,1
        mtspr   SPRN_HSPRG0,r13
 
index ef267fd9dd225a3c7f3dfbcacc057a0bf28dca8e..5e7ece0fda9f5b802eb561ce51774cfb625032a6 100644 (file)
@@ -92,7 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
        mce->in_use = 1;
 
        mce->initiator = MCE_INITIATOR_CPU;
-       if (handled)
+       /* Mark it recovered if we have handled it and MSR(RI=1). */
+       if (handled && (regs->msr & MSR_RI))
                mce->disposition = MCE_DISPOSITION_RECOVERED;
        else
                mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
index a5c0153ede37f21d6dd8f47beac764587789fbc5..7fdf324d5b51f4ebb8716dfadda4ed40fb34d23d 100644 (file)
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(get_pci_dma_ops);
 static int get_phb_number(struct device_node *dn)
 {
        int ret, phb_id = -1;
+       u32 prop_32;
        u64 prop;
 
        /*
@@ -86,8 +87,10 @@ static int get_phb_number(struct device_node *dn)
         * reading "ibm,opal-phbid", only present in OPAL environment.
         */
        ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
-       if (ret)
-               ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop);
+       if (ret) {
+               ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+               prop = prop_32;
+       }
 
        if (!ret)
                phb_id = (int)(prop & (MAX_PHBS - 1));
index 58ccf86415b46cd5c2db593424ecde772cd0d959..9ee2623e0f674977ee8e8f07bbfb1297a2f05dc8 100644 (file)
@@ -1074,26 +1074,6 @@ static inline void restore_sprs(struct thread_struct *old_thread,
 #endif
 }
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void flush_tmregs_to_thread(struct task_struct *tsk)
-{
-       /*
-        * Process self tracing is not yet supported through
-        * ptrace interface. Ptrace generic code should have
-        * prevented this from happening in the first place.
-        * Warn once here with the message, if some how it
-        * is attempted.
-        */
-       WARN_ONCE(tsk == current,
-               "Not expecting ptrace on self: TM regs may be incorrect\n");
-
-       /*
-        * If task is not current, it should have been flushed
-        * already to it's thread_struct during __switch_to().
-        */
-}
-#endif
-
 struct task_struct *__switch_to(struct task_struct *prev,
        struct task_struct *new)
 {
index 6ee4b72cda4201840cf2b85f38661831c5e37233..4e74fc588a3f6497177fa82e1d148b32383ffcb9 100644 (file)
@@ -2940,7 +2940,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 
        /* Don't print anything after quiesce under OPAL, it crashes OFW */
        if (of_platform != PLATFORM_OPAL) {
-               prom_printf("Booting Linux via __start() ...\n");
+               prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
                prom_debug("->dt_header_start=0x%x\n", hdr);
        }
 
index 4f3c5756cc09898f984de4cc6cf6fc7c1ba830ac..bf91658a8a406b051e2072e2ce948317abfefe29 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/switch_to.h>
+#include <asm/tm.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -118,6 +119,24 @@ static const struct pt_regs_offset regoffset_table[] = {
        REG_OFFSET_END,
 };
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+       /*
+        * If task is not current, it will have been flushed already to
+        * it's thread_struct during __switch_to().
+        *
+        * A reclaim flushes ALL the state.
+        */
+
+       if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
+               tm_reclaim_current(TM_CAUSE_SIGNAL);
+
+}
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
 /**
  * regs_query_register_offset() - query register offset from its name
  * @name:      the name of a register
index c3e861df4b203ce5be8e0e9f0fb8f4e236d41cd5..24ec3ea4b3a2eeeeae2e0f713226acd252bab806 100644 (file)
@@ -93,15 +93,16 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
  * and we are running with enough of the MMU enabled to have our
  * proper kernel virtual addresses
  *
- * Find out what kind of machine we're on and save any data we need
- * from the early boot process (devtree is copied on pmac by prom_init()).
- * This is called very early on the boot process, after a minimal
- * MMU environment has been set up but before MMU_init is called.
+ * We do the initial parsing of the flat device-tree and prepares
+ * for the MMU to be fully initialized.
  */
 extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
 
 notrace void __init machine_init(u64 dt_ptr)
 {
+       /* Configure static keys first, now that we're relocated. */
+       setup_feature_keys();
+
        /* Enable early debugging if any specified (see udbg.h) */
        udbg_early_init();
 
index eafb9a79e0116b600624a16212c2a02bfb46e363..7ac8e6eaab5ba24566f1f6fe06829e22727e86ea 100644 (file)
@@ -300,6 +300,7 @@ void __init early_setup(unsigned long dt_ptr)
 
        /* Apply all the dynamic patching */
        apply_feature_fixups();
+       setup_feature_keys();
 
        /* Initialize the hash table or TLB handling */
        early_init_mmu();
index 6767605ea8da2eddb54152dbf92538467fe2dc3e..4111d30badfad30fa1eb7dd29abb6a26d8a338de 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/security.h>
 #include <linux/memblock.h>
 
+#include <asm/cpu_has_feature.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
index cbabd143acae8e9db3b3883be876527b68b83574..78a7449bf489d49a400ae45703ce77bdc7206f4b 100644 (file)
@@ -30,7 +30,7 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
        $(call if_changed,vdso32ld)
 
 # strip rule for the .so file
@@ -39,12 +39,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
        $(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
        $(call if_changed_dep,vdso32as)
 
 # actual build commands
 quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
 
index c710802b8fb685a7cb5815d86387f7debb356d9e..366ae09b14c1e3a5179987d312ec4d1fedf86313 100644 (file)
@@ -23,7 +23,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
        $(call if_changed,vdso64ld)
 
 # strip rule for the .so file
@@ -32,12 +32,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
        $(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
+$(obj-vdso64): %.o: %.S FORCE
        $(call if_changed_dep,vdso64as)
 
 # actual build commands
 quiet_cmd_vdso64ld = VDSO64L $@
-      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
 quiet_cmd_vdso64as = VDSO64A $@
       cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
 
index d90870a66b60b4b0820a3044466f70716e1ed610..0a57fe6d49ccf43a07224c4f7da1f597e36cc10c 100644 (file)
@@ -127,8 +127,9 @@ _GLOBAL(csum_partial_copy_generic)
        stw     r7,12(r1)
        stw     r8,8(r1)
 
-       andi.   r0,r4,1                 /* is destination address even ? */
-       cmplwi  cr7,r0,0
+       rlwinm  r0,r4,3,0x8
+       rlwnm   r6,r6,r0,0,31   /* odd destination address: rotate one byte */
+       cmplwi  cr7,r0,0        /* is destination address even ? */
        addic   r12,r6,0
        addi    r6,r4,-4
        neg     r0,r4
@@ -237,7 +238,7 @@ _GLOBAL(csum_partial_copy_generic)
 66:    addze   r3,r12
        addi    r1,r1,16
        beqlr+  cr7
-       rlwinm  r3,r3,8,0,31    /* swap bytes for odd destination */
+       rlwinm  r3,r3,8,0,31    /* odd destination address: rotate one byte */
        blr
 
 /* read fault */
index 74145f02ad417b496ceba07b0f114a2bbd77bc75..043415f0bdb1646fa85f7bb26d04f0241c68ff63 100644 (file)
@@ -188,7 +188,10 @@ void __init apply_feature_fixups(void)
                          &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
 #endif
        do_final_fixups();
+}
 
+void __init setup_feature_keys(void)
+{
        /*
         * Initialise jump label. This causes all the cpu/mmu_has_feature()
         * checks to take on their correct polarity based on the current set of
index 5be15cff758df193bd935baa50867a5dc9e3b063..2975754c65ea9514e4d7d10284a2beb42d6dd3cb 100644 (file)
@@ -496,8 +496,10 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
        gang = alloc_spu_gang();
        SPUFS_I(inode)->i_ctx = NULL;
        SPUFS_I(inode)->i_gang = gang;
-       if (!gang)
+       if (!gang) {
+               ret = -ENOMEM;
                goto out_iput;
+       }
 
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
index 309d9ccccd509c83097386dca03621f4db3b7ca3..c61667e8bb06c51385fbe21c18b288aeb889f2da 100644 (file)
@@ -187,6 +187,11 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
        if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
            !firmware_has_feature(FW_FEATURE_LPAR)) {
                dev->dev.archdata.dma_ops = &dma_direct_ops;
+               /*
+                * Set the coherent DMA mask to prevent the iommu
+                * being used unnecessarily
+                */
+               dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
                return;
        }
 #endif
index e505223b4ec5ed2d82bcb08b2e718a6b64601f18..ed8bba68a162120d282bfc2286a674b049e44d5c 100644 (file)
@@ -228,7 +228,8 @@ int __init opal_event_init(void)
                }
 
                /* Install interrupt handler */
-               rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+               rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
+                                "opal", NULL);
                if (rc) {
                        irq_dispose_mapping(virq);
                        pr_warn("Error %d requesting irq %d (0x%x)\n",
index 8b4fc68cebcb2f4c02a716074cb1b1c1045d0d1f..6c9a65b52e63b589edbe809a4b65851ca2ea2f79 100644 (file)
@@ -399,6 +399,7 @@ static int opal_recover_mce(struct pt_regs *regs,
 
        if (!(regs->msr & MSR_RI)) {
                /* If MSR_RI isn't set, we cannot recover */
+               pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
                recovered = 0;
        } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
                /* Platform corrected itself */
index 6b9528307f620e639be196d3b691e6ac8d159d28..fd9444f9fb0c24e0ed02dcb722c0e824ddc218e2 100644 (file)
@@ -111,10 +111,17 @@ static int __init iommu_setup(char *str)
 }
 early_param("iommu", iommu_setup);
 
-static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
 {
-       return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
-               (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+       /*
+        * WARNING: We cannot rely on the resource flags. The Linux PCI
+        * allocation code sometimes decides to put a 64-bit prefetchable
+        * BAR in the 32-bit window, so we have to compare the addresses.
+        *
+        * For simplicity we only test resource start.
+        */
+       return (r->start >= phb->ioda.m64_base &&
+               r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
 }
 
 static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
@@ -229,7 +236,7 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
        sgsz = phb->ioda.m64_segsize;
        for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
                r = &pdev->resource[i];
-               if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags))
+               if (!r->parent || !pnv_pci_is_m64(phb, r))
                        continue;
 
                start = _ALIGN_DOWN(r->start - base, sgsz);
@@ -1877,7 +1884,7 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
                                        unsigned shift, unsigned long index,
                                        unsigned long npages)
 {
-       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
+       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
        unsigned long start, end, inc;
 
        /* We'll invalidate DMA address in PE scope */
@@ -2863,7 +2870,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
                res = &pdev->resource[i + PCI_IOV_RESOURCES];
                if (!res->flags || res->parent)
                        continue;
-               if (!pnv_pci_is_mem_pref_64(res->flags)) {
+               if (!pnv_pci_is_m64(phb, res)) {
                        dev_warn(&pdev->dev, "Don't support SR-IOV with"
                                        " non M64 VF BAR%d: %pR. \n",
                                 i, res);
@@ -2958,7 +2965,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
                        index++;
                }
        } else if ((res->flags & IORESOURCE_MEM) &&
-                  !pnv_pci_is_mem_pref_64(res->flags)) {
+                  !pnv_pci_is_m64(phb, res)) {
                region.start = res->start -
                               phb->hose->mem_offset[0] -
                               phb->ioda.m32_pci_base;
@@ -3083,9 +3090,12 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
                bridge = bridge->bus->self;
        }
 
-       /* We fail back to M32 if M64 isn't supported */
-       if (phb->ioda.m64_segsize &&
-           pnv_pci_is_mem_pref_64(type))
+       /*
+        * We fall back to M32 if M64 isn't supported. We enforce the M64
+        * alignment for any 64-bit resource, PCIe doesn't care and
+        * bridges only do 64-bit prefetchable anyway.
+        */
+       if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64))
                return phb->ioda.m64_segsize;
        if (type & IORESOURCE_MEM)
                return phb->ioda.m32_segsize;
@@ -3125,7 +3135,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
                w = NULL;
                if (r->flags & type & IORESOURCE_IO)
                        w = &hose->io_resource;
-               else if (pnv_pci_is_mem_pref_64(r->flags) &&
+               else if (pnv_pci_is_m64(phb, r) &&
                         (type & IORESOURCE_PREFETCH) &&
                         phb->ioda.m64_segsize)
                        w = &hose->mem_resources[1];
index 43f7beb2902d0b5d5b1001cff100fdf67c650fb4..76ec104e88beea0e89e3473d988e23fbdb7312c1 100644 (file)
@@ -320,19 +320,6 @@ static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
        return dlpar_update_device_tree_lmb(lmb);
 }
 
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
-{
-       unsigned long section_nr;
-       struct mem_section *mem_sect;
-       struct memory_block *mem_block;
-
-       section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
-       mem_sect = __nr_to_section(section_nr);
-
-       mem_block = find_memory_block(mem_sect);
-       return mem_block;
-}
-
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
@@ -420,6 +407,19 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
 
 static int dlpar_add_lmb(struct of_drconf_cell *);
 
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+       unsigned long section_nr;
+       struct mem_section *mem_sect;
+       struct memory_block *mem_block;
+
+       section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+       mem_sect = __nr_to_section(section_nr);
+
+       mem_block = find_memory_block(mem_sect);
+       return mem_block;
+}
+
 static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
 {
        struct memory_block *mem_block;
index 0031eda320c3de7d94e3d9198b0cbc9087a7aec4..385e7aa9e2731cabf13cfccef35a2934719cdbc3 100644 (file)
@@ -1,6 +1,7 @@
 config PPC_XICS
        def_bool n
        select PPC_SMP_MUXED_IPI
+       select HARDIRQS_SW_RESEND
 
 config PPC_ICP_NATIVE
        def_bool n
index 27c936c080a66ffb5523931ac74d6c4c3a01849d..1c6bf4b66f56854d0717a644c75dea70e3fabde7 100644 (file)
@@ -156,7 +156,9 @@ static struct irq_chip ics_opal_irq_chip = {
        .irq_mask = ics_opal_mask_irq,
        .irq_unmask = ics_opal_unmask_irq,
        .irq_eoi = NULL, /* Patched at init time */
-       .irq_set_affinity = ics_opal_set_affinity
+       .irq_set_affinity = ics_opal_set_affinity,
+       .irq_set_type = xics_set_irq_type,
+       .irq_retrigger = xics_retrigger,
 };
 
 static int ics_opal_map(struct ics *ics, unsigned int virq);
index 3854dd41558d2697e73f9d72f9dffb27327952a8..78ee5c778ef8c7650ccea536aff7fb5cefc0bca3 100644 (file)
@@ -163,7 +163,9 @@ static struct irq_chip ics_rtas_irq_chip = {
        .irq_mask = ics_rtas_mask_irq,
        .irq_unmask = ics_rtas_unmask_irq,
        .irq_eoi = NULL, /* Patched at init time */
-       .irq_set_affinity = ics_rtas_set_affinity
+       .irq_set_affinity = ics_rtas_set_affinity,
+       .irq_set_type = xics_set_irq_type,
+       .irq_retrigger = xics_retrigger,
 };
 
 static int ics_rtas_map(struct ics *ics, unsigned int virq)
index a795a5f0301c482ec8edeb3047464ea709f7326b..9d530f47958857621ad19655ac12f975f17e090d 100644 (file)
@@ -328,8 +328,12 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq,
 
        pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
 
-       /* They aren't all level sensitive but we just don't really know */
-       irq_set_status_flags(virq, IRQ_LEVEL);
+       /*
+        * Mark interrupts as edge sensitive by default so that resend
+        * actually works. The device-tree parsing will turn the LSIs
+        * back to level.
+        */
+       irq_clear_status_flags(virq, IRQ_LEVEL);
 
        /* Don't call into ICS for IPIs */
        if (hw == XICS_IPI) {
@@ -351,13 +355,54 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
                           irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
-       /* Current xics implementation translates everything
-        * to level. It is not technically right for MSIs but this
-        * is irrelevant at this point. We might get smarter in the future
-        */
        *out_hwirq = intspec[0];
-       *out_flags = IRQ_TYPE_LEVEL_LOW;
 
+       /*
+        * If intsize is at least 2, we look for the type in the second cell,
+        * we assume the LSB indicates a level interrupt.
+        */
+       if (intsize > 1) {
+               if (intspec[1] & 1)
+                       *out_flags = IRQ_TYPE_LEVEL_LOW;
+               else
+                       *out_flags = IRQ_TYPE_EDGE_RISING;
+       } else
+               *out_flags = IRQ_TYPE_LEVEL_LOW;
+
+       return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+       /*
+        * We only support these. This has really no effect other than setting
+        * the corresponding descriptor bits mind you but those will in turn
+        * affect the resend function when re-enabling an edge interrupt.
+        *
+        * Set set the default to edge as explained in map().
+        */
+       if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+               flow_type = IRQ_TYPE_EDGE_RISING;
+
+       if (flow_type != IRQ_TYPE_EDGE_RISING &&
+           flow_type != IRQ_TYPE_LEVEL_LOW)
+               return -EINVAL;
+
+       irqd_set_trigger_type(d, flow_type);
+
+       return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+       /*
+        * We need to push a dummy CPPR when retriggering, since the subsequent
+        * EOI will try to pop it. Passing 0 works, as the function hard codes
+        * the priority value anyway.
+        */
+       xics_push_cppr(0);
+
+       /* Tell the core to do a soft retrigger */
        return 0;
 }
 
index 0e348781327b2dcc9667b38344fb98e8c4936dcc..e751fe25d6ab670428f5c97b8464d9102baddbe6 100644 (file)
@@ -872,4 +872,17 @@ config S390_GUEST
          Select this option if you want to run the kernel as a guest under
          the KVM hypervisor.
 
+config S390_GUEST_OLD_TRANSPORT
+       def_bool y
+       prompt "Guest support for old s390 virtio transport (DEPRECATED)"
+       depends on S390_GUEST
+       help
+         Enable this option to add support for the old s390-virtio
+         transport (i.e. virtio devices NOT based on virtio-ccw). This
+         type of virtio devices is only available on the experimental
+         kuli userspace or with old (< 2.6) qemu. If you are running
+         with a modern version of qemu (which supports virtio-ccw since
+         1.4 and uses it by default since version 2.4), you probably won't
+         need this.
+
 endmenu
index 97a69dbba649b6bb7c6dd936440f572c5d02ed6f..9d35ec0cb8fc916ba3b4b63f5bdb1b6ebda5de55 100644 (file)
@@ -100,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
        }
 }
 
+static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+               SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
        if (box->pmu->pmu_idx == 0)
@@ -127,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = {
 
 static struct intel_uncore_ops snb_uncore_msr_ops = {
        .init_box       = snb_uncore_msr_init_box,
+       .enable_box     = snb_uncore_msr_enable_box,
        .exit_box       = snb_uncore_msr_exit_box,
        .disable_event  = snb_uncore_msr_disable_event,
        .enable_event   = snb_uncore_msr_enable_event,
@@ -192,6 +199,12 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
        }
 }
 
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+               SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
        if (box->pmu->pmu_idx == 0)
@@ -200,6 +213,7 @@ static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
 
 static struct intel_uncore_ops skl_uncore_msr_ops = {
        .init_box       = skl_uncore_msr_init_box,
+       .enable_box     = skl_uncore_msr_enable_box,
        .exit_box       = skl_uncore_msr_exit_box,
        .disable_event  = snb_uncore_msr_disable_event,
        .enable_event   = snb_uncore_msr_enable_event,
index 824e54086e071456b170380c52e561dbbac62cf7..8aee83bcf71f2dc5a380009957d0858a7b4a2507 100644 (file)
@@ -2626,7 +2626,7 @@ void hswep_uncore_cpu_init(void)
 
 static struct intel_uncore_type hswep_uncore_ha = {
        .name           = "ha",
-       .num_counters   = 5,
+       .num_counters   = 4,
        .num_boxes      = 2,
        .perf_ctr_bits  = 48,
        SNBEP_UNCORE_PCI_COMMON_INIT(),
@@ -2645,7 +2645,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = {
 
 static struct intel_uncore_type hswep_uncore_imc = {
        .name           = "imc",
-       .num_counters   = 5,
+       .num_counters   = 4,
        .num_boxes      = 8,
        .perf_ctr_bits  = 48,
        .fixed_ctr_bits = 48,
@@ -2691,7 +2691,7 @@ static struct intel_uncore_type hswep_uncore_irp = {
 
 static struct intel_uncore_type hswep_uncore_qpi = {
        .name                   = "qpi",
-       .num_counters           = 5,
+       .num_counters           = 4,
        .num_boxes              = 3,
        .perf_ctr_bits          = 48,
        .perf_ctr               = SNBEP_PCI_PMON_CTR0,
@@ -2773,7 +2773,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
 
 static struct intel_uncore_type hswep_uncore_r3qpi = {
        .name           = "r3qpi",
-       .num_counters   = 4,
+       .num_counters   = 3,
        .num_boxes      = 3,
        .perf_ctr_bits  = 44,
        .constraints    = hswep_uncore_r3qpi_constraints,
@@ -2972,7 +2972,7 @@ static struct intel_uncore_type bdx_uncore_ha = {
 
 static struct intel_uncore_type bdx_uncore_imc = {
        .name           = "imc",
-       .num_counters   = 5,
+       .num_counters   = 4,
        .num_boxes      = 8,
        .perf_ctr_bits  = 48,
        .fixed_ctr_bits = 48,
index 6c1ff31d99ffeb0d0a28c5ee472bb1865ff23df3..495c776de4b470f8eb53236a0ddeb2ca8f043b6b 100644 (file)
@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
                *cursor &= 0xfe;
        }
        /*
-        * Similar treatment for VEX3 prefix.
-        * TODO: add XOP/EVEX treatment when insn decoder supports them
+        * Similar treatment for VEX3/EVEX prefix.
+        * TODO: add XOP treatment when insn decoder supports them
         */
-       if (insn->vex_prefix.nbytes == 3) {
+       if (insn->vex_prefix.nbytes >= 3) {
                /*
                 * vex2:     c5    rvvvvLpp   (has no b bit)
                 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
                 * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
-                *   (evex will need setting of both b and x since
-                *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
-                * Setting VEX3.b (setting because it has inverted meaning):
+                * Setting VEX3.b (setting because it has inverted meaning).
+                * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+                * is the 4th bit of MODRM.rm, and needs the same treatment.
+                * For VEX3-encoded insns, VEX3.x value has no effect in
+                * non-SIB encoding, the change is superfluous but harmless.
                 */
                cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
-               *cursor |= 0x20;
+               *cursor |= 0x60;
        }
 
        /*
@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 
        reg = MODRM_REG(insn);  /* Fetch modrm.reg */
        reg2 = 0xff;            /* Fetch vex.vvvv */
-       if (insn->vex_prefix.nbytes == 2)
-               reg2 = insn->vex_prefix.bytes[1];
-       else if (insn->vex_prefix.nbytes == 3)
+       if (insn->vex_prefix.nbytes)
                reg2 = insn->vex_prefix.bytes[2];
        /*
-        * TODO: add XOP, EXEV vvvv reading.
+        * TODO: add XOP vvvv reading.
         *
         * vex.vvvv field is in bits 6-3, bits are inverted.
         * But in 32-bit mode, high-order bit may be ignored.
index 66b2166ea4a1c715a0362ed99cbeb3692a031476..0df8a0370d328e9d8a31f266dd09336e65e9f013 100644 (file)
@@ -187,7 +187,8 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
 void uv_bios_init(void)
 {
        uv_systab = NULL;
-       if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
+       if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+           !efi.uv_systab || efi_runtime_disabled()) {
                pr_crit("UV: UVsystab: missing\n");
                return;
        }
index 8c234dd9b8bc595c21d9d5abaa7327e02b9cc572..80cc7c089a15e908ae070d95ad4879e5b6309555 100644 (file)
@@ -1527,11 +1527,12 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
 {
        struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
        u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+       const u32 STATUS_MASK = 0x80000037;
 
        if (mmio->num_lines)
                offset = to_interleave_offset(offset, mmio);
 
-       return readl(mmio->addr.base + offset);
+       return readl(mmio->addr.base + offset) & STATUS_MASK;
 }
 
 static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
index 1a04af6d24212cdd16e5c8091d01f16e2b409384..6c6519f6492a4198c78cae1eaad5e33e03efd2d9 100644 (file)
@@ -3950,6 +3950,7 @@ static void rbd_dev_release(struct device *dev)
        bool need_put = !!rbd_dev->opts;
 
        ceph_oid_destroy(&rbd_dev->header_oid);
+       ceph_oloc_destroy(&rbd_dev->header_oloc);
 
        rbd_put_client(rbd_dev->rbd_client);
        rbd_spec_put(rbd_dev->spec);
@@ -5336,15 +5337,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
        }
        spec->pool_id = (u64)rc;
 
-       /* The ceph file layout needs to fit pool id in 32 bits */
-
-       if (spec->pool_id > (u64)U32_MAX) {
-               rbd_warn(NULL, "pool id too large (%llu > %u)",
-                               (unsigned long long)spec->pool_id, U32_MAX);
-               rc = -EIO;
-               goto err_out_client;
-       }
-
        rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
        if (!rbd_dev) {
                rc = -ENOMEM;
index 1523e05c46fc95b29c47af3b51ebdc9f93af9029..93b1aaa5ba3be26d5de4d0a7b461ecc2fe7beb61 100644 (file)
@@ -391,22 +391,16 @@ static int init_vq(struct virtio_blk *vblk)
                num_vqs = 1;
 
        vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
-       if (!vblk->vqs) {
-               err = -ENOMEM;
-               goto out;
-       }
+       if (!vblk->vqs)
+               return -ENOMEM;
 
        names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
-       if (!names)
-               goto err_names;
-
        callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
-       if (!callbacks)
-               goto err_callbacks;
-
        vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
-       if (!vqs)
-               goto err_vqs;
+       if (!names || !callbacks || !vqs) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        for (i = 0; i < num_vqs; i++) {
                callbacks[i] = virtblk_done;
@@ -417,7 +411,7 @@ static int init_vq(struct virtio_blk *vblk)
        /* Discover virtqueues and write information to configuration.  */
        err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
        if (err)
-               goto err_find_vqs;
+               goto out;
 
        for (i = 0; i < num_vqs; i++) {
                spin_lock_init(&vblk->vqs[i].lock);
@@ -425,16 +419,12 @@ static int init_vq(struct virtio_blk *vblk)
        }
        vblk->num_vqs = num_vqs;
 
- err_find_vqs:
+out:
        kfree(vqs);
- err_vqs:
        kfree(callbacks);
- err_callbacks:
        kfree(names);
- err_names:
        if (err)
                kfree(vblk->vqs);
- out:
        return err;
 }
 
index c99c24bc79b02262298ea64b5ed3e6e625429f4d..9ae6c116c4746286770052fb6f241baddc08d5b0 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/efi.h>
+#include <linux/vmalloc.h>
 
 #define NO_FURTHER_WRITE_ACTION -1
 
@@ -108,14 +109,15 @@ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
        int ret;
        void *cap_hdr_temp;
 
-       cap_hdr_temp = kmap(cap_info->pages[0]);
+       cap_hdr_temp = vmap(cap_info->pages, cap_info->index,
+                       VM_MAP, PAGE_KERNEL);
        if (!cap_hdr_temp) {
-               pr_debug("%s: kmap() failed\n", __func__);
+               pr_debug("%s: vmap() failed\n", __func__);
                return -EFAULT;
        }
 
        ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
-       kunmap(cap_info->pages[0]);
+       vunmap(cap_hdr_temp);
        if (ret) {
                pr_err("%s: efi_capsule_update() failed\n", __func__);
                return ret;
index 53b9fd2293ee8f5af6f7f6a38de61730c8c99ec9..6eedff45e6d77811a5c4922e6be6fa5cabb3b307 100644 (file)
@@ -190,9 +190,9 @@ efi_capsule_update_locked(efi_capsule_header_t *capsule,
  * map the capsule described by @capsule with its data in @pages and
  * send it to the firmware via the UpdateCapsule() runtime service.
  *
- * @capsule must be a virtual mapping of the first page in @pages
- * (@pages[0]) in the kernel address space. That is, a
- * capsule_header_t that describes the entire contents of the capsule
+ * @capsule must be a virtual mapping of the complete capsule update in the
+ * kernel address space, as the capsule can be consumed immediately.
+ * capsule_header_t that describes the entire contents of the capsule
  * must be at the start of the first data page.
  *
  * Even though this function will validate that the firmware supports
index bdee9a01ef35ad6fa34f1238ea7268454e0e56c0..c466ee2b0c973a7c77cb16566770dec3b426db33 100644 (file)
@@ -90,8 +90,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
         */
        mutex_lock(&afu->contexts_lock);
        idr_preload(GFP_KERNEL);
-       i = idr_alloc(&ctx->afu->contexts_idr, ctx,
-                     ctx->afu->adapter->native->sl_ops->min_pe,
+       i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe,
                      ctx->afu->num_procs, GFP_NOWAIT);
        idr_preload_end();
        mutex_unlock(&afu->contexts_lock);
index de090533f18cb8eb5b2c8f0a0b3bfbf6f11e5e00..344a0ff8f8c7df97e5328f89e46cd3f55b791d68 100644 (file)
@@ -561,7 +561,6 @@ struct cxl_service_layer_ops {
        u64 (*timebase_read)(struct cxl *adapter);
        int capi_mode;
        bool needs_reset_before_disable;
-       int min_pe;
 };
 
 struct cxl_native {
@@ -603,6 +602,7 @@ struct cxl {
        struct bin_attribute cxl_attr;
        int adapter_num;
        int user_irqs;
+       int min_pe;
        u64 ps_size;
        u16 psl_rev;
        u16 base_image;
index 3bcdaee11ba159aa13580bdd48151e0ef0755dbd..e606fdc4bc9cc3ec0ff7d3f29d4e691c30a26bd4 100644 (file)
@@ -924,7 +924,7 @@ static irqreturn_t native_irq_multiplexed(int irq, void *data)
        return fail_psl_irq(afu, &irq_info);
 }
 
-void native_irq_wait(struct cxl_context *ctx)
+static void native_irq_wait(struct cxl_context *ctx)
 {
        u64 dsisr;
        int timeout = 1000;
index d152e2de8c9375e2760b03cd93857d542f9a36d4..6f0c4ac4b6498991913647b0d70b2d4805870f0b 100644 (file)
@@ -379,7 +379,7 @@ static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, u64 *capp_unit_id
 
 static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev)
 {
-       u64 psl_dsnctl;
+       u64 psl_dsnctl, psl_fircntl;
        u64 chipid;
        u64 capp_unit_id;
        int rc;
@@ -398,8 +398,11 @@ static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_
        cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
        /* snoop write mask */
        cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL);
-       /* set fir_accum */
-       cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL);
+       /* set fir_cntl to recommended value for production env */
+       psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+       psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+       psl_fircntl |= 0x1ULL; /* ce_thresh */
+       cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl);
        /* for debugging with trace arrays */
        cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL);
 
@@ -1521,14 +1524,15 @@ static const struct cxl_service_layer_ops xsl_ops = {
        .write_timebase_ctrl = write_timebase_ctrl_xsl,
        .timebase_read = timebase_read_xsl,
        .capi_mode = OPAL_PHB_CAPI_MODE_DMA,
-       .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */
 };
 
 static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev)
 {
        if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) {
+               /* Mellanox CX-4 */
                dev_info(&adapter->dev, "Device uses an XSL\n");
                adapter->native->sl_ops = &xsl_ops;
+               adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */
        } else {
                dev_info(&adapter->dev, "Device uses a PSL\n");
                adapter->native->sl_ops = &psl_ops;
index dee8def1c1936c36fc82d894e269c1d8825d5c29..7ada5f1b7bb67b808ec824561b0ceb6def317562 100644 (file)
@@ -221,7 +221,7 @@ int cxl_pci_vphb_add(struct cxl_afu *afu)
        /* Setup the PHB using arch provided callback */
        phb->ops = &cxl_pcie_pci_ops;
        phb->cfg_addr = NULL;
-       phb->cfg_data = 0;
+       phb->cfg_data = NULL;
        phb->private_data = afu;
        phb->controller_ops = cxl_pci_controller_ops;
 
index 88e91666f145f0e7b0e95bf560ecc5e78f2ada22..368795aad5c974dbb59b1a43825b76c3e443acd2 100644 (file)
@@ -1269,6 +1269,7 @@ static int btt_blk_init(struct btt *btt)
                }
        }
        set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+       btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
        revalidate_disk(btt->btt_disk);
 
        return 0;
index 3fa7919f94a8785860afd3487d803f5b3010acd9..97dd2925ed6e95f1f06ffa6f4a0b5643acd4c07a 100644 (file)
@@ -140,10 +140,30 @@ static ssize_t namespace_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(namespace);
 
+static ssize_t size_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_btt *nd_btt = to_nd_btt(dev);
+       ssize_t rc;
+
+       device_lock(dev);
+       if (dev->driver)
+               rc = sprintf(buf, "%llu\n", nd_btt->size);
+       else {
+               /* no size to convey if the btt instance is disabled */
+               rc = -ENXIO;
+       }
+       device_unlock(dev);
+
+       return rc;
+}
+static DEVICE_ATTR_RO(size);
+
 static struct attribute *nd_btt_attributes[] = {
        &dev_attr_sector_size.attr,
        &dev_attr_namespace.attr,
        &dev_attr_uuid.attr,
+       &dev_attr_size.attr,
        NULL,
 };
 
index 40476399d22793aece0438da0f5a0976cef063ab..8024a0ef86d3af9f0ba5ef169260e2e342023d8e 100644 (file)
@@ -143,6 +143,7 @@ struct nd_btt {
        struct nd_namespace_common *ndns;
        struct btt *btt;
        unsigned long lbasize;
+       u64 size;
        u8 *uuid;
        int id;
 };
index a02981efdad570148e39925cfbe4a8579ca7f7ca..eafa6138a6b81866a3bc09739ab3398513746253 100644 (file)
@@ -1411,6 +1411,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
        if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
                pci_msi_domain_update_chip_ops(info);
 
+       info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+
        domain = msi_create_irq_domain(fwnode, info, parent);
        if (!domain)
                return NULL;
index cecc15a880de6928fed5ee0d35588fbae03423dd..3fa17ac8df5492f4d2e4681d1a4f07f2f8defffa 100644 (file)
@@ -1080,8 +1080,8 @@ static int riocm_send_ack(struct rio_channel *ch)
 static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
                                           long timeout)
 {
-       struct rio_channel *ch = NULL;
-       struct rio_channel *new_ch = NULL;
+       struct rio_channel *ch;
+       struct rio_channel *new_ch;
        struct conn_req *req;
        struct cm_peer *peer;
        int found = 0;
@@ -1155,6 +1155,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
 
        spin_unlock_bh(&ch->lock);
        riocm_put_channel(ch);
+       ch = NULL;
        kfree(req);
 
        down_read(&rdev_sem);
@@ -1172,7 +1173,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
        if (!found) {
                /* If peer device object not found, simply ignore the request */
                err = -ENODEV;
-               goto err_nodev;
+               goto err_put_new_ch;
        }
 
        new_ch->rdev = peer->rdev;
@@ -1184,15 +1185,16 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
 
        *new_ch_id = new_ch->id;
        return new_ch;
+
+err_put_new_ch:
+       spin_lock_bh(&idr_lock);
+       idr_remove(&ch_idr, new_ch->id);
+       spin_unlock_bh(&idr_lock);
+       riocm_put_channel(new_ch);
+
 err_put:
-       riocm_put_channel(ch);
-err_nodev:
-       if (new_ch) {
-               spin_lock_bh(&idr_lock);
-               idr_remove(&ch_idr, new_ch->id);
-               spin_unlock_bh(&idr_lock);
-               riocm_put_channel(new_ch);
-       }
+       if (ch)
+               riocm_put_channel(ch);
        *new_ch_id = 0;
        return ERR_PTR(err);
 }
index 241891a57caf8e97637d3e6c2ce6baecd021589a..df40692a9011ceb2cb2481af2eaa58a9ff92136e 100644 (file)
@@ -6,4 +6,8 @@
 # it under the terms of the GNU General Public License (version 2 only)
 # as published by the Free Software Foundation.
 
-obj-$(CONFIG_S390_GUEST) += kvm_virtio.o virtio_ccw.o
+s390-virtio-objs := virtio_ccw.o
+ifdef CONFIG_S390_GUEST_OLD_TRANSPORT
+s390-virtio-objs += kvm_virtio.o
+endif
+obj-$(CONFIG_S390_GUEST) += $(s390-virtio-objs)
index 1d060fd293a3b8e8a4d4095b2ad84241913272d5..5e5c11f37b2420cbb406ff5591ad15fe615f5ed8 100644 (file)
@@ -458,6 +458,8 @@ static int __init kvm_devices_init(void)
        if (test_devices_support(total_memory_size) < 0)
                return -ENODEV;
 
+       pr_warn("The s390-virtio transport is deprecated. Please switch to a modern host providing virtio-ccw.\n");
+
        rc = vmem_add_mapping(total_memory_size, PAGE_SIZE);
        if (rc)
                return rc;
@@ -482,7 +484,7 @@ static int __init kvm_devices_init(void)
 }
 
 /* code for early console output with virtio_console */
-static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+static int early_put_chars(u32 vtermno, const char *buf, int count)
 {
        char scratch[17];
        unsigned int len = count;
index 0ddf3a2dbfc490a58d150039a57136460e9a1e08..e3b30ea9ece5945c935791798ab27ed8f6c3dd11 100644 (file)
@@ -307,6 +307,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
 
        vhost_disable_notify(&vsock->dev, vq);
        for (;;) {
+               u32 len;
+
                if (!vhost_vsock_more_replies(vsock)) {
                        /* Stop tx until the device processes already
                         * pending replies.  Leave tx virtqueue
@@ -334,13 +336,15 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
                        continue;
                }
 
+               len = pkt->len;
+
                /* Only accept correctly addressed packets */
                if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
                        virtio_transport_recv_pkt(pkt);
                else
                        virtio_transport_free_pkt(pkt);
 
-               vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+               vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
                added = true;
        }
 
index 114a0c88afb8bfad71dc7b4612ccf9ed66f59d54..e383ecdaca594ce0786c321af35ee59b1807007e 100644 (file)
@@ -327,6 +327,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
                 * host should service the ring ASAP. */
                if (out_sgs)
                        vq->notify(&vq->vq);
+               if (indirect)
+                       kfree(desc);
                END_USE(vq);
                return -ENOSPC;
        }
@@ -426,6 +428,7 @@ unmap_release:
        if (indirect)
                kfree(desc);
 
+       END_USE(vq);
        return -EIO;
 }
 
index b6d210e7a993fd67634b3523aa3e61a1121d31bd..d9ddcfc18c91f8acd0ac977171116e3659aa2242 100644 (file)
@@ -862,33 +862,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
-                                    struct btrfs_trans_handle *trans,
-                                    u64 ref_root, u64 bytenr, u64 num_bytes)
-{
-       struct btrfs_delayed_ref_root *delayed_refs;
-       struct btrfs_delayed_ref_head *ref_head;
-       int ret = 0;
-
-       if (!fs_info->quota_enabled || !is_fstree(ref_root))
-               return 0;
-
-       delayed_refs = &trans->transaction->delayed_refs;
-
-       spin_lock(&delayed_refs->lock);
-       ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
-       if (!ref_head) {
-               ret = -ENOENT;
-               goto out;
-       }
-       WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
-       ref_head->qgroup_ref_root = ref_root;
-       ref_head->qgroup_reserved = num_bytes;
-out:
-       spin_unlock(&delayed_refs->lock);
-       return ret;
-}
-
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
                                struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
index 5fca9534a2712b0b4dec9e9b15a1e024f272bb2f..43f3629760e90f186730842b0b1c609f799ae256 100644 (file)
@@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                               u64 parent, u64 ref_root,
                               u64 owner, u64 offset, u64 reserved, int action,
                               struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
-                                    struct btrfs_trans_handle *trans,
-                                    u64 ref_root, u64 bytenr, u64 num_bytes);
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
                                struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
index 9404121fd5f7b44f165c6f76c856548cf5722aff..5842423f8f47b6a7146240c6b47ea1ead637f984 100644 (file)
@@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                 */
                clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                          &BTRFS_I(inode)->runtime_flags);
+               /*
+                * An ordered extent might have started before and completed
+                * already with io errors, in which case the inode was not
+                * updated and we end up here. So check the inode's mapping
+                * flags for any errors that might have happened while doing
+                * writeback of file data.
+                */
+               ret = btrfs_inode_check_errors(inode);
                inode_unlock(inode);
                goto out;
        }
index 2f5975954ccf198737e07b29c8706024114a78ae..08dfc57e22705363f1159def79316263a9f4293a 100644 (file)
@@ -3435,10 +3435,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                found_key.offset = 0;
                inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
                ret = PTR_ERR_OR_ZERO(inode);
-               if (ret && ret != -ESTALE)
+               if (ret && ret != -ENOENT)
                        goto out;
 
-               if (ret == -ESTALE && root == root->fs_info->tree_root) {
+               if (ret == -ENOENT && root == root->fs_info->tree_root) {
                        struct btrfs_root *dead_root;
                        struct btrfs_fs_info *fs_info = root->fs_info;
                        int is_dead_root = 0;
@@ -3474,7 +3474,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 * Inode is already gone but the orphan item is still there,
                 * kill the orphan item.
                 */
-               if (ret == -ESTALE) {
+               if (ret == -ENOENT) {
                        trans = btrfs_start_transaction(root, 1);
                        if (IS_ERR(trans)) {
                                ret = PTR_ERR(trans);
@@ -3633,7 +3633,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
 /*
  * read an inode from the btree into the in-memory inode
  */
-static void btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode)
 {
        struct btrfs_path *path;
        struct extent_buffer *leaf;
@@ -3652,14 +3652,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
                filled = true;
 
        path = btrfs_alloc_path();
-       if (!path)
+       if (!path) {
+               ret = -ENOMEM;
                goto make_bad;
+       }
 
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
-       if (ret)
+       if (ret) {
+               if (ret > 0)
+                       ret = -ENOENT;
                goto make_bad;
+       }
 
        leaf = path->nodes[0];
 
@@ -3812,11 +3817,12 @@ cache_acl:
        }
 
        btrfs_update_iflags(inode);
-       return;
+       return 0;
 
 make_bad:
        btrfs_free_path(path);
        make_bad_inode(inode);
+       return ret;
 }
 
 /*
@@ -4204,6 +4210,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        int err = 0;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_trans_handle *trans;
+       u64 last_unlink_trans;
 
        if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
@@ -4226,11 +4233,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        if (err)
                goto out;
 
+       last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
        /* now the directory is empty */
        err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
                                 dentry->d_name.name, dentry->d_name.len);
-       if (!err)
+       if (!err) {
                btrfs_i_size_write(inode, 0);
+               /*
+                * Propagate the last_unlink_trans value of the deleted dir to
+                * its parent directory. This is to prevent an unrecoverable
+                * log tree in the case we do something like this:
+                * 1) create dir foo
+                * 2) create snapshot under dir foo
+                * 3) delete the snapshot
+                * 4) rmdir foo
+                * 5) mkdir foo
+                * 6) fsync foo or some file inside foo
+                */
+               if (last_unlink_trans >= trans->transid)
+                       BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
+       }
 out:
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root);
@@ -5606,7 +5629,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                return ERR_PTR(-ENOMEM);
 
        if (inode->i_state & I_NEW) {
-               btrfs_read_locked_inode(inode);
+               int ret;
+
+               ret = btrfs_read_locked_inode(inode);
                if (!is_bad_inode(inode)) {
                        inode_tree_add(inode);
                        unlock_new_inode(inode);
@@ -5615,7 +5640,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                } else {
                        unlock_new_inode(inode);
                        iput(inode);
-                       inode = ERR_PTR(-ESTALE);
+                       ASSERT(ret < 0);
+                       inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
                }
        }
 
index b71dd298385c1b5cfb3c00761db0c8ee674e01e7..efe129fe26788c1078d737a2bc238373c2efcbac 100644 (file)
@@ -231,7 +231,6 @@ struct pending_dir_move {
        u64 parent_ino;
        u64 ino;
        u64 gen;
-       bool is_orphan;
        struct list_head update_refs;
 };
 
@@ -274,6 +273,39 @@ struct name_cache_entry {
        char name[];
 };
 
+static void inconsistent_snapshot_error(struct send_ctx *sctx,
+                                       enum btrfs_compare_tree_result result,
+                                       const char *what)
+{
+       const char *result_string;
+
+       switch (result) {
+       case BTRFS_COMPARE_TREE_NEW:
+               result_string = "new";
+               break;
+       case BTRFS_COMPARE_TREE_DELETED:
+               result_string = "deleted";
+               break;
+       case BTRFS_COMPARE_TREE_CHANGED:
+               result_string = "updated";
+               break;
+       case BTRFS_COMPARE_TREE_SAME:
+               ASSERT(0);
+               result_string = "unchanged";
+               break;
+       default:
+               ASSERT(0);
+               result_string = "unexpected";
+       }
+
+       btrfs_err(sctx->send_root->fs_info,
+                 "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
+                 result_string, what, sctx->cmp_key->objectid,
+                 sctx->send_root->root_key.objectid,
+                 (sctx->parent_root ?
+                  sctx->parent_root->root_key.objectid : 0));
+}
+
 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
 
 static struct waiting_dir_move *
@@ -1861,7 +1893,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
         * was already unlinked/moved, so we can safely assume that we will not
         * overwrite anything at this point in time.
         */
-       if (other_inode > sctx->send_progress) {
+       if (other_inode > sctx->send_progress ||
+           is_waiting_for_move(sctx, other_inode)) {
                ret = get_inode_info(sctx->parent_root, other_inode, NULL,
                                who_gen, NULL, NULL, NULL, NULL);
                if (ret < 0)
@@ -2502,6 +2535,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
        key.type = BTRFS_INODE_ITEM_KEY;
        key.offset = 0;
        ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
+       if (ret > 0)
+               ret = -ENOENT;
        if (ret < 0)
                goto out;
 
@@ -2947,6 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
                }
 
                if (loc.objectid > send_progress) {
+                       struct orphan_dir_info *odi;
+
+                       odi = get_orphan_dir_info(sctx, dir);
+                       free_orphan_dir_info(sctx, odi);
                        ret = 0;
                        goto out;
                }
@@ -3047,7 +3086,6 @@ static int add_pending_dir_move(struct send_ctx *sctx,
        pm->parent_ino = parent_ino;
        pm->ino = ino;
        pm->gen = ino_gen;
-       pm->is_orphan = is_orphan;
        INIT_LIST_HEAD(&pm->list);
        INIT_LIST_HEAD(&pm->update_refs);
        RB_CLEAR_NODE(&pm->node);
@@ -3113,6 +3151,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
        return NULL;
 }
 
+static int path_loop(struct send_ctx *sctx, struct fs_path *name,
+                    u64 ino, u64 gen, u64 *ancestor_ino)
+{
+       int ret = 0;
+       u64 parent_inode = 0;
+       u64 parent_gen = 0;
+       u64 start_ino = ino;
+
+       *ancestor_ino = 0;
+       while (ino != BTRFS_FIRST_FREE_OBJECTID) {
+               fs_path_reset(name);
+
+               if (is_waiting_for_rm(sctx, ino))
+                       break;
+               if (is_waiting_for_move(sctx, ino)) {
+                       if (*ancestor_ino == 0)
+                               *ancestor_ino = ino;
+                       ret = get_first_ref(sctx->parent_root, ino,
+                                           &parent_inode, &parent_gen, name);
+               } else {
+                       ret = __get_cur_name_and_parent(sctx, ino, gen,
+                                                       &parent_inode,
+                                                       &parent_gen, name);
+                       if (ret > 0) {
+                               ret = 0;
+                               break;
+                       }
+               }
+               if (ret < 0)
+                       break;
+               if (parent_inode == start_ino) {
+                       ret = 1;
+                       if (*ancestor_ino == 0)
+                               *ancestor_ino = ino;
+                       break;
+               }
+               ino = parent_inode;
+               gen = parent_gen;
+       }
+       return ret;
+}
+
 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 {
        struct fs_path *from_path = NULL;
@@ -3123,6 +3203,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
        u64 parent_ino, parent_gen;
        struct waiting_dir_move *dm = NULL;
        u64 rmdir_ino = 0;
+       u64 ancestor;
+       bool is_orphan;
        int ret;
 
        name = fs_path_alloc();
@@ -3135,9 +3217,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
        dm = get_waiting_dir_move(sctx, pm->ino);
        ASSERT(dm);
        rmdir_ino = dm->rmdir_ino;
+       is_orphan = dm->orphanized;
        free_waiting_dir_move(sctx, dm);
 
-       if (pm->is_orphan) {
+       if (is_orphan) {
                ret = gen_unique_name(sctx, pm->ino,
                                      pm->gen, from_path);
        } else {
@@ -3155,6 +3238,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                goto out;
 
        sctx->send_progress = sctx->cur_ino + 1;
+       ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
+       if (ret < 0)
+               goto out;
+       if (ret) {
+               LIST_HEAD(deleted_refs);
+               ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
+               ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
+                                          &pm->update_refs, &deleted_refs,
+                                          is_orphan);
+               if (ret < 0)
+                       goto out;
+               if (rmdir_ino) {
+                       dm = get_waiting_dir_move(sctx, pm->ino);
+                       ASSERT(dm);
+                       dm->rmdir_ino = rmdir_ino;
+               }
+               goto out;
+       }
        fs_path_reset(name);
        to_path = name;
        name = NULL;
@@ -3174,7 +3275,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                        /* already deleted */
                        goto finish;
                }
-               ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
+               ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
                if (ret < 0)
                        goto out;
                if (!ret)
@@ -3204,8 +3305,18 @@ finish:
         * and old parent(s).
         */
        list_for_each_entry(cur, &pm->update_refs, list) {
-               if (cur->dir == rmdir_ino)
+               /*
+                * The parent inode might have been deleted in the send snapshot
+                */
+               ret = get_inode_info(sctx->send_root, cur->dir, NULL,
+                                    NULL, NULL, NULL, NULL, NULL);
+               if (ret == -ENOENT) {
+                       ret = 0;
                        continue;
+               }
+               if (ret < 0)
+                       goto out;
+
                ret = send_utimes(sctx, cur->dir, cur->dir_gen);
                if (ret < 0)
                        goto out;
@@ -3325,6 +3436,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
        u64 left_gen;
        u64 right_gen;
        int ret = 0;
+       struct waiting_dir_move *wdm;
 
        if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
                return 0;
@@ -3383,7 +3495,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
                goto out;
        }
 
-       if (is_waiting_for_move(sctx, di_key.objectid)) {
+       wdm = get_waiting_dir_move(sctx, di_key.objectid);
+       if (wdm && !wdm->orphanized) {
                ret = add_pending_dir_move(sctx,
                                           sctx->cur_ino,
                                           sctx->cur_inode_gen,
@@ -3470,7 +3583,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
                        ret = is_ancestor(sctx->parent_root,
                                          sctx->cur_ino, sctx->cur_inode_gen,
                                          ino, path_before);
-                       break;
+                       if (ret)
+                               break;
                }
 
                fs_path_reset(path_before);
@@ -3643,11 +3757,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                goto out;
                        if (ret) {
                                struct name_cache_entry *nce;
+                               struct waiting_dir_move *wdm;
 
                                ret = orphanize_inode(sctx, ow_inode, ow_gen,
                                                cur->full_path);
                                if (ret < 0)
                                        goto out;
+
+                               /*
+                                * If ow_inode has its rename operation delayed
+                                * make sure that its orphanized name is used in
+                                * the source path when performing its rename
+                                * operation.
+                                */
+                               if (is_waiting_for_move(sctx, ow_inode)) {
+                                       wdm = get_waiting_dir_move(sctx,
+                                                                  ow_inode);
+                                       ASSERT(wdm);
+                                       wdm->orphanized = true;
+                               }
+
                                /*
                                 * Make sure we clear our orphanized inode's
                                 * name from the name cache. This is because the
@@ -3663,6 +3792,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                        name_cache_delete(sctx, nce);
                                        kfree(nce);
                                }
+
+                               /*
+                                * ow_inode might currently be an ancestor of
+                                * cur_ino, therefore compute valid_path (the
+                                * current path of cur_ino) again because it
+                                * might contain the pre-orphanization name of
+                                * ow_inode, which is no longer valid.
+                                */
+                               fs_path_reset(valid_path);
+                               ret = get_cur_path(sctx, sctx->cur_ino,
+                                          sctx->cur_inode_gen, valid_path);
+                               if (ret < 0)
+                                       goto out;
                        } else {
                                ret = send_unlink(sctx, cur->full_path);
                                if (ret < 0)
@@ -5602,7 +5744,10 @@ static int changed_ref(struct send_ctx *sctx,
 {
        int ret = 0;
 
-       BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
+               inconsistent_snapshot_error(sctx, result, "reference");
+               return -EIO;
+       }
 
        if (!sctx->cur_inode_new_gen &&
            sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
@@ -5627,7 +5772,10 @@ static int changed_xattr(struct send_ctx *sctx,
 {
        int ret = 0;
 
-       BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
+               inconsistent_snapshot_error(sctx, result, "xattr");
+               return -EIO;
+       }
 
        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
                if (result == BTRFS_COMPARE_TREE_NEW)
@@ -5651,7 +5799,10 @@ static int changed_extent(struct send_ctx *sctx,
 {
        int ret = 0;
 
-       BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
+               inconsistent_snapshot_error(sctx, result, "extent");
+               return -EIO;
+       }
 
        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
                if (result != BTRFS_COMPARE_TREE_DELETED)
index d31a0c4f56bed436e0eb933cceb592fdc498eb53..fff3f3efa43602e0c04f9e5e019bf0e85a239d6f 100644 (file)
@@ -4469,7 +4469,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
 static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                                         const int slot,
                                         const struct btrfs_key *key,
-                                        struct inode *inode)
+                                        struct inode *inode,
+                                        u64 *other_ino)
 {
        int ret;
        struct btrfs_path *search_path;
@@ -4528,7 +4529,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                                           search_path, parent,
                                           name, this_name_len, 0);
                if (di && !IS_ERR(di)) {
-                       ret = 1;
+                       struct btrfs_key di_key;
+
+                       btrfs_dir_item_key_to_cpu(search_path->nodes[0],
+                                                 di, &di_key);
+                       if (di_key.type == BTRFS_INODE_ITEM_KEY) {
+                               ret = 1;
+                               *other_ino = di_key.objectid;
+                       } else {
+                               ret = -EAGAIN;
+                       }
                        goto out;
                } else if (IS_ERR(di)) {
                        ret = PTR_ERR(di);
@@ -4722,16 +4732,71 @@ again:
                if ((min_key.type == BTRFS_INODE_REF_KEY ||
                     min_key.type == BTRFS_INODE_EXTREF_KEY) &&
                    BTRFS_I(inode)->generation == trans->transid) {
+                       u64 other_ino = 0;
+
                        ret = btrfs_check_ref_name_override(path->nodes[0],
                                                            path->slots[0],
-                                                           &min_key, inode);
+                                                           &min_key, inode,
+                                                           &other_ino);
                        if (ret < 0) {
                                err = ret;
                                goto out_unlock;
                        } else if (ret > 0) {
-                               err = 1;
-                               btrfs_set_log_full_commit(root->fs_info, trans);
-                               goto out_unlock;
+                               struct btrfs_key inode_key;
+                               struct inode *other_inode;
+
+                               if (ins_nr > 0) {
+                                       ins_nr++;
+                               } else {
+                                       ins_nr = 1;
+                                       ins_start_slot = path->slots[0];
+                               }
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, ins_start_slot,
+                                                ins_nr, inode_only,
+                                                logged_isize);
+                               if (ret < 0) {
+                                       err = ret;
+                                       goto out_unlock;
+                               }
+                               ins_nr = 0;
+                               btrfs_release_path(path);
+                               inode_key.objectid = other_ino;
+                               inode_key.type = BTRFS_INODE_ITEM_KEY;
+                               inode_key.offset = 0;
+                               other_inode = btrfs_iget(root->fs_info->sb,
+                                                        &inode_key, root,
+                                                        NULL);
+                               /*
+                                * If the other inode that had a conflicting dir
+                                * entry was deleted in the current transaction,
+                                * we don't need to do more work nor fallback to
+                                * a transaction commit.
+                                */
+                               if (IS_ERR(other_inode) &&
+                                   PTR_ERR(other_inode) == -ENOENT) {
+                                       goto next_key;
+                               } else if (IS_ERR(other_inode)) {
+                                       err = PTR_ERR(other_inode);
+                                       goto out_unlock;
+                               }
+                               /*
+                                * We are safe logging the other inode without
+                                * acquiring its i_mutex as long as we log with
+                                * the LOG_INODE_EXISTS mode. We're safe against
+                                * concurrent renames of the other inode as well
+                                * because during a rename we pin the log and
+                                * update the log with the new name before we
+                                * unpin it.
+                                */
+                               err = btrfs_log_inode(trans, root, other_inode,
+                                                     LOG_INODE_EXISTS,
+                                                     0, LLONG_MAX, ctx);
+                               iput(other_inode);
+                               if (err)
+                                       goto out_unlock;
+                               else
+                                       goto next_key;
                        }
                }
 
@@ -4799,7 +4864,7 @@ next_slot:
                        ins_nr = 0;
                }
                btrfs_release_path(path);
-
+next_key:
                if (min_key.offset < (u64)-1) {
                        min_key.offset++;
                } else if (min_key.type < max_key.type) {
@@ -4993,8 +5058,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
                        break;
 
-               if (IS_ROOT(parent))
+               if (IS_ROOT(parent)) {
+                       inode = d_inode(parent);
+                       if (btrfs_must_commit_transaction(trans, inode))
+                               ret = 1;
                        break;
+               }
 
                parent = dget_parent(parent);
                dput(old_parent);
index 99115cae1652ac1661d37a5a286da7180b6d9d94..16e6ded0b7f281bf72e8074b9d2896713fe4aef2 100644 (file)
@@ -1347,9 +1347,12 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
 {
        struct inode *inode = &ci->vfs_inode;
        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-       struct ceph_mds_session *session = *psession;
+       struct ceph_mds_session *session = NULL;
        int mds;
+
        dout("ceph_flush_snaps %p\n", inode);
+       if (psession)
+               session = *psession;
 retry:
        spin_lock(&ci->i_ceph_lock);
        if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
index fa59a85226b262f2fe086ec5dfc1bf6813711986..f72d4ae303b273a98ee2631d8ed3dde21a71e796 100644 (file)
@@ -2759,6 +2759,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
        } else {
                path = NULL;
                pathlen = 0;
+               pathbase = 0;
        }
 
        spin_lock(&ci->i_ceph_lock);
index 33da841a21bb2871f753fb38a72dd76ce2725ded..6f47527348042dc83f5b97f48870eed6d1afcf41 100644 (file)
@@ -338,6 +338,8 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
        case 0:
                break;
        case -NFS4ERR_EXPIRED:
+       case -NFS4ERR_ADMIN_REVOKED:
+       case -NFS4ERR_DELEG_REVOKED:
        case -NFS4ERR_STALE_STATEID:
        case -NFS4ERR_OLD_STATEID:
        case -NFS4ERR_BAD_STATEID:
index 324bfdc212504de591347da77c1ad3f1db595974..9bf64eacba5bd6d47a04ca3c9ac66b74912bdc72 100644 (file)
@@ -396,6 +396,10 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
 extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(struct work_struct *);
+extern void nfs4_set_lease_period(struct nfs_client *clp,
+               unsigned long lease,
+               unsigned long lastrenewed);
+
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
index a036e93bdf9656813abec3a3565e26fca3b28838..1949bbd806ebd4381ec54ea06ea97fa2f5619bed 100644 (file)
@@ -4237,12 +4237,9 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
                err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
                trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
                if (err == 0) {
-                       struct nfs_client *clp = server->nfs_client;
-
-                       spin_lock(&clp->cl_lock);
-                       clp->cl_lease_time = fsinfo->lease_time * HZ;
-                       clp->cl_last_renewal = now;
-                       spin_unlock(&clp->cl_lock);
+                       nfs4_set_lease_period(server->nfs_client,
+                                       fsinfo->lease_time * HZ,
+                                       now);
                        break;
                }
                err = nfs4_handle_exception(server, err, &exception);
index e1ba58c3d1ad305ab28d932a5b90ac269092f98b..82e77198d17efdf656315f39c3d50c4d9aa568a9 100644 (file)
@@ -136,6 +136,26 @@ nfs4_kill_renewd(struct nfs_client *clp)
        cancel_delayed_work_sync(&clp->cl_renewd);
 }
 
+/**
+ * nfs4_set_lease_period - Sets the lease period on a nfs_client
+ *
+ * @clp: pointer to nfs_client
+ * @lease: new value for lease period
+ * @lastrenewed: time at which lease was last renewed
+ */
+void nfs4_set_lease_period(struct nfs_client *clp,
+               unsigned long lease,
+               unsigned long lastrenewed)
+{
+       spin_lock(&clp->cl_lock);
+       clp->cl_lease_time = lease;
+       clp->cl_last_renewal = lastrenewed;
+       spin_unlock(&clp->cl_lock);
+
+       /* Cap maximum reconnect timeout at 1/2 lease period */
+       rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1);
+}
+
 /*
  * Local variables:
  *   c-basic-offset: 8
index 834b875900d62addf6db7f6eb590ce5c2d1b09bc..cada00aa5096d7dbd57a0b8717d1a6583abbbfa7 100644 (file)
@@ -277,20 +277,17 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
 {
        int status;
        struct nfs_fsinfo fsinfo;
+       unsigned long now;
 
        if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
                nfs4_schedule_state_renewal(clp);
                return 0;
        }
 
+       now = jiffies;
        status = nfs4_proc_get_lease_time(clp, &fsinfo);
        if (status == 0) {
-               /* Update lease time and schedule renewal */
-               spin_lock(&clp->cl_lock);
-               clp->cl_lease_time = fsinfo.lease_time * HZ;
-               clp->cl_last_renewal = jiffies;
-               spin_unlock(&clp->cl_lock);
-
+               nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now);
                nfs4_schedule_state_renewal(clp);
        }
 
index 09e18fdf61e5b48dd67234b19972a10dd5131662..b9a8c813e5e66b5e751080e1bd7b11b7e8d87634 100644 (file)
@@ -46,7 +46,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                cached = 0;
 
        for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
-               pages[lru] = global_page_state(NR_LRU_BASE + lru);
+               pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 
        available = si_mem_available();
 
index 54a8e65e18b622edf10df6d07ca0d84fea9e49ae..7d026bf277131f7bc4c79529cc26488e3a8281ee 100644 (file)
 #include <asm-generic/qrwlock_types.h>
 
 /*
- * Writer states & reader shift and bias
+ * Writer states & reader shift and bias.
+ *
+ *       | +0 | +1 | +2 | +3 |
+ *   ----+----+----+----+----+
+ *    LE | 78 | 56 | 34 | 12 | 0x12345678
+ *   ----+----+----+----+----+
+ *       | wr |      rd      |
+ *       +----+----+----+----+
+ *
+ *   ----+----+----+----+----+
+ *    BE | 12 | 34 | 56 | 78 | 0x12345678
+ *   ----+----+----+----+----+
+ *       |      rd      | wr |
+ *       +----+----+----+----+
  */
 #define        _QW_WAITING     1               /* A writer is waiting     */
 #define        _QW_LOCKED      0xff            /* A writer holds the lock */
@@ -133,13 +146,23 @@ static inline void queued_read_unlock(struct qrwlock *lock)
        (void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
 }
 
+/**
+ * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: the write byte address of a queue rwlock
+ */
+static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
+{
+       return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
+}
+
 /**
  * queued_write_unlock - release write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
 static inline void queued_write_unlock(struct qrwlock *lock)
 {
-       smp_store_release((u8 *)&lock->cnts, 0);
+       smp_store_release(__qrwlock_write_byte(lock), 0);
 }
 
 /*
index 4f0bfe5912b2f1eb9be7a5c25f4e1ec64c2fafbe..e8c81fbd5f9cd11d4ac01adb929c7d1eee86e69e 100644 (file)
@@ -270,6 +270,8 @@ enum {
        MSI_FLAG_MULTI_PCI_MSI          = (1 << 2),
        /* Support PCI MSIX interrupts */
        MSI_FLAG_PCI_MSIX               = (1 << 3),
+       /* Needs early activate, required for PCI */
+       MSI_FLAG_ACTIVATE_EARLY         = (1 << 4),
 };
 
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
index 8ed4326164cc843b41da6fbfe69d85cee2d61232..2b6b43cc0dd5121d8d4f6024f8ec67f862ff3328 100644 (file)
@@ -743,7 +743,9 @@ struct perf_event_context {
        u64                             parent_gen;
        u64                             generation;
        int                             pin_count;
+#ifdef CONFIG_CGROUP_PERF
        int                             nr_cgroups;      /* cgroup evts */
+#endif
        void                            *task_ctx_data; /* pmu specific data */
        struct rcu_head                 rcu_head;
 };
@@ -769,7 +771,9 @@ struct perf_cpu_context {
        unsigned int                    hrtimer_active;
 
        struct pmu                      *unique_pmu;
+#ifdef CONFIG_CGROUP_PERF
        struct perf_cgroup              *cgrp;
+#endif
 };
 
 struct perf_output_handle {
index b6810c92b8bb14d9ef8c9d1036a0b25d40530ac3..5c02b0691587797e303758eb74a4cad7d5e577ff 100644 (file)
@@ -195,6 +195,8 @@ int         rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *,
                                struct rpc_xprt *,
                                void *),
                        void *data);
+void           rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+                       unsigned long timeo);
 
 const char *rpc_proc_name(const struct rpc_task *task);
 #endif /* __KERNEL__ */
index 5e3e1b63dbb3c97f0145bd15a23dba7d4e916528..a16070dd03eefe9281476183ff4b5a0692523a09 100644 (file)
@@ -218,7 +218,8 @@ struct rpc_xprt {
        struct work_struct      task_cleanup;
        struct timer_list       timer;
        unsigned long           last_used,
-                               idle_timeout;
+                               idle_timeout,
+                               max_reconnect_timeout;
 
        /*
         * Send stuff
index 6b011c19b50f969d66d3ec0b26d7036d6bac7525..1d57ed3d84d2c3d10d02dce56d728b9cf0ece17b 100644 (file)
@@ -32,7 +32,7 @@
  */
 
 #ifndef _UAPI_LINUX_VIRTIO_VSOCK_H
-#define _UAPI_LINUX_VIRTIO_VOSCK_H
+#define _UAPI_LINUX_VIRTIO_VSOCK_H
 
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
index cbae529b7ce0999684c42d7906df3de3febdc209..180d526a55c3ab5b54befbb264279e61d7e5a519 100644 (file)
@@ -136,8 +136,8 @@ struct cxl_event_afu_driver_reserved {
         *
         * Of course the contents will be ABI, but that's up the AFU driver.
         */
-       size_t data_size;
-       u8 data[];
+       __u32 data_size;
+       __u8 data[];
 };
 
 struct cxl_event {
index a19550d80ab1724d03ac1b0799aad54a6f1cf823..1903b8f3a7057b5687f67bbe191758951d209968 100644 (file)
@@ -843,6 +843,32 @@ perf_cgroup_mark_enabled(struct perf_event *event,
                }
        }
 }
+
+/*
+ * Update cpuctx->cgrp so that it is set when first cgroup event is added and
+ * cleared when last cgroup event is removed.
+ */
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+                        struct perf_event_context *ctx, bool add)
+{
+       struct perf_cpu_context *cpuctx;
+
+       if (!is_cgroup_event(event))
+               return;
+
+       if (add && ctx->nr_cgroups++)
+               return;
+       else if (!add && --ctx->nr_cgroups)
+               return;
+       /*
+        * Because cgroup events are always per-cpu events,
+        * this will always be called from the right CPU.
+        */
+       cpuctx = __get_cpu_context(ctx);
+       cpuctx->cgrp = add ? event->cgrp : NULL;
+}
+
 #else /* !CONFIG_CGROUP_PERF */
 
 static inline bool
@@ -920,6 +946,13 @@ perf_cgroup_mark_enabled(struct perf_event *event,
                         struct perf_event_context *ctx)
 {
 }
+
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+                        struct perf_event_context *ctx, bool add)
+{
+}
+
 #endif
 
 /*
@@ -1392,6 +1425,7 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+
        lockdep_assert_held(&ctx->lock);
 
        WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1446,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
                list_add_tail(&event->group_entry, list);
        }
 
-       if (is_cgroup_event(event))
-               ctx->nr_cgroups++;
+       list_update_cgroup_event(event, ctx, true);
 
        list_add_rcu(&event->event_entry, &ctx->event_list);
        ctx->nr_events++;
@@ -1581,8 +1614,6 @@ static void perf_group_attach(struct perf_event *event)
 static void
 list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 {
-       struct perf_cpu_context *cpuctx;
-
        WARN_ON_ONCE(event->ctx != ctx);
        lockdep_assert_held(&ctx->lock);
 
@@ -1594,20 +1625,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
        event->attach_state &= ~PERF_ATTACH_CONTEXT;
 
-       if (is_cgroup_event(event)) {
-               ctx->nr_cgroups--;
-               /*
-                * Because cgroup events are always per-cpu events, this will
-                * always be called from the right CPU.
-                */
-               cpuctx = __get_cpu_context(ctx);
-               /*
-                * If there are no more cgroup events then clear cgrp to avoid
-                * stale pointer in update_cgrp_time_from_cpuctx().
-                */
-               if (!ctx->nr_cgroups)
-                       cpuctx->cgrp = NULL;
-       }
+       list_update_cgroup_event(event, ctx, false);
 
        ctx->nr_events--;
        if (event->attr.inherit_stat)
@@ -1716,8 +1734,8 @@ static inline int pmu_filter_match(struct perf_event *event)
 static inline int
 event_filter_match(struct perf_event *event)
 {
-       return (event->cpu == -1 || event->cpu == smp_processor_id())
-           && perf_cgroup_match(event) && pmu_filter_match(event);
+       return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
+              perf_cgroup_match(event) && pmu_filter_match(event);
 }
 
 static void
@@ -1737,8 +1755,8 @@ event_sched_out(struct perf_event *event,
         * maintained, otherwise bogus information is return
         * via read() for time_enabled, time_running:
         */
-       if (event->state == PERF_EVENT_STATE_INACTIVE
-           && !event_filter_match(event)) {
+       if (event->state == PERF_EVENT_STATE_INACTIVE &&
+           !event_filter_match(event)) {
                delta = tstamp - event->tstamp_stopped;
                event->tstamp_running += delta;
                event->tstamp_stopped = tstamp;
@@ -2236,10 +2254,15 @@ perf_install_in_context(struct perf_event_context *ctx,
 
        lockdep_assert_held(&ctx->mutex);
 
-       event->ctx = ctx;
        if (event->cpu != -1)
                event->cpu = cpu;
 
+       /*
+        * Ensures that if we can observe event->ctx, both the event and ctx
+        * will be 'complete'. See perf_iterate_sb_cpu().
+        */
+       smp_store_release(&event->ctx, ctx);
+
        if (!task) {
                cpu_function_call(cpu, __perf_install_in_context, event);
                return;
@@ -5969,6 +5992,14 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
        struct perf_event *event;
 
        list_for_each_entry_rcu(event, &pel->list, sb_list) {
+               /*
+                * Skip events that are not fully formed yet; ensure that
+                * if we observe event->ctx, both event and ctx will be
+                * complete enough. See perf_install_in_context().
+                */
+               if (!smp_load_acquire(&event->ctx))
+                       continue;
+
                if (event->state < PERF_EVENT_STATE_INACTIVE)
                        continue;
                if (!event_filter_match(event))
index 33664f70e2d25e880efdbc8695fcc12989871150..46cb3a301bc1555a84607bab7b2aea8a2bcaf7e6 100644 (file)
@@ -179,7 +179,15 @@ int __read_mostly futex_cmpxchg_enabled;
  * Futex flags used to encode options to functions and preserve them across
  * restarts.
  */
-#define FLAGS_SHARED           0x01
+#ifdef CONFIG_MMU
+# define FLAGS_SHARED          0x01
+#else
+/*
+ * NOMMU does not have per process address space. Let the compiler optimize
+ * code away.
+ */
+# define FLAGS_SHARED          0x00
+#endif
 #define FLAGS_CLOCKRT          0x02
 #define FLAGS_HAS_TIMEOUT      0x04
 
@@ -405,6 +413,16 @@ static void get_futex_key_refs(union futex_key *key)
        if (!key->both.ptr)
                return;
 
+       /*
+        * On MMU less systems futexes are always "private" as there is no per
+        * process address space. We need the smp wmb nevertheless - yes,
+        * arch/blackfin has MMU less SMP ...
+        */
+       if (!IS_ENABLED(CONFIG_MMU)) {
+               smp_mb(); /* explicit smp_mb(); (B) */
+               return;
+       }
+
        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
        case FUT_OFF_INODE:
                ihold(key->shared.inode); /* implies smp_mb(); (B) */
@@ -436,6 +454,9 @@ static void drop_futex_key_refs(union futex_key *key)
                return;
        }
 
+       if (!IS_ENABLED(CONFIG_MMU))
+               return;
+
        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
        case FUT_OFF_INODE:
                iput(key->shared.inode);
index 54999350162cbc89326a67a3728814e17871d784..19e9dfbe97fa53f732edd375cdd50341327750ac 100644 (file)
@@ -359,6 +359,17 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
                else
                        dev_dbg(dev, "irq [%d-%d] for MSI\n",
                                virq, virq + desc->nvec_used - 1);
+               /*
+                * This flag is set by the PCI layer as we need to activate
+                * the MSI entries before the PCI layer enables MSI in the
+                * card. Otherwise the card latches a random msi message.
+                */
+               if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
+                       struct irq_data *irq_data;
+
+                       irq_data = irq_domain_get_irq_data(domain, desc->irq);
+                       irq_domain_activate_irq(irq_data);
+               }
        }
 
        return 0;
index 37649e69056cf974e27d0137260f8ff46ad688df..8a99abf58080be21fbb954777b48aca24d4342b5 100644 (file)
@@ -450,7 +450,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
                                goto gotlock;
                        }
                }
-               WRITE_ONCE(pn->state, vcpu_halted);
+               WRITE_ONCE(pn->state, vcpu_hashed);
                qstat_inc(qstat_pv_wait_head, true);
                qstat_inc(qstat_pv_wait_again, waitcnt);
                pv_wait(&l->locked, _Q_SLOW_VAL);
index 22e02530984574a6fee718aad26720516c34d497..b9d0315162540d1236e5e1268f184531d8259114 100644 (file)
@@ -153,7 +153,6 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf,
                 */
                if ((counter == qstat_pv_latency_kick) ||
                    (counter == qstat_pv_latency_wake)) {
-                       stat = 0;
                        if (kicks)
                                stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
                }
index 5c883fe8e44016df1109e8f66dd73377dfecb5e9..2a906f20fba7c4ba63d89fe87ac3527607be453a 100644 (file)
@@ -74,6 +74,7 @@
 #include <linux/context_tracking.h>
 #include <linux/compiler.h>
 #include <linux/frame.h>
+#include <linux/prefetch.h>
 
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
@@ -2971,6 +2972,23 @@ DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
 EXPORT_PER_CPU_SYMBOL(kstat);
 EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
 
+/*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+       struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+       prefetch(curr);
+       prefetch(&curr->exec_start);
+}
+
 /*
  * Return accounted runtime for the task.
  * In case the task is currently running, return the runtime plus current's
@@ -3005,6 +3023,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
         * thread, breaking clock_gettime().
         */
        if (task_current(rq, p) && task_on_rq_queued(p)) {
+               prefetch_curr_exec_start(p);
                update_rq_clock(rq);
                p->sched_class->update_curr(rq);
        }
index 5be58820465cced6c0d1dc06c9de146bddcf664f..d4184498c9f5e3c8674015f97fe04da2417dafbd 100644 (file)
@@ -168,7 +168,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
 
        if (old_idx == IDX_INVALID) {
                cp->size++;
-               cp->elements[cp->size - 1].dl = 0;
+               cp->elements[cp->size - 1].dl = dl;
                cp->elements[cp->size - 1].cpu = cpu;
                cp->elements[cpu].idx = cp->size - 1;
                cpudl_change_key(cp, cp->size - 1, dl);
index 1934f658c03604272e5809f32fee1a6a3c928990..9858266fb0b32b07158b2b9e8f66e7fa2f482eb7 100644 (file)
@@ -508,13 +508,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
  */
 void account_idle_ticks(unsigned long ticks)
 {
+       cputime_t cputime, steal;
 
        if (sched_clock_irqtime) {
                irqtime_account_idle_ticks(ticks);
                return;
        }
 
-       account_idle_time(jiffies_to_cputime(ticks));
+       cputime = jiffies_to_cputime(ticks);
+       steal = steal_account_process_time(cputime);
+
+       if (steal >= cputime)
+               return;
+
+       cputime -= steal;
+       account_idle_time(cputime);
 }
 
 /*
index fcb7f0217ff48610cca9bd5bd078f2f05df79164..1ce8867283dcde6e35ef74a72a1bca968decb918 100644 (file)
@@ -658,8 +658,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
         *
         * XXX figure out if select_task_rq_dl() deals with offline cpus.
         */
-       if (unlikely(!rq->online))
+       if (unlikely(!rq->online)) {
+               lockdep_unpin_lock(&rq->lock, rf.cookie);
                rq = dl_task_offline_migration(rq, p);
+               rf.cookie = lockdep_pin_lock(&rq->lock);
+       }
 
        /*
         * Queueing this task back might have overloaded rq, check if we need
index 4088eedea7637859844c777dfa56dfb23136c142..039de34f15216d19f61386b6d6c66744660516c9 100644 (file)
@@ -4269,7 +4269,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
        pcfs_rq = tg->parent->cfs_rq[cpu];
 
        cfs_rq->throttle_count = pcfs_rq->throttle_count;
-       pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+       cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
 }
 
 /* conditionally throttle active cfs_rq's from put_prev_entity() */
index b9aa1b0b38b0ecdb769cd33a8d77c0ffda2621bf..87e11d8ad536b8c360740ca9ce96461b0daeaaee 100644 (file)
@@ -1448,6 +1448,7 @@ static void dissolve_free_huge_page(struct page *page)
                list_del(&page->lru);
                h->free_huge_pages--;
                h->free_huge_pages_node[nid]--;
+               h->max_huge_pages--;
                update_and_free_page(h, page);
        }
        spin_unlock(&hugetlb_lock);
index b6728a33a4aca104fde8022b90fdf2df5630af31..baabaad4a4aaa89bb13fc691cf5df58af46c8b3b 100644 (file)
@@ -217,11 +217,8 @@ void quarantine_reduce(void)
        new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
                QUARANTINE_FRACTION;
        percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
-       if (WARN_ONCE(new_quarantine_size < percpu_quarantines,
-               "Too little memory, disabling global KASAN quarantine.\n"))
-               new_quarantine_size = 0;
-       else
-               new_quarantine_size -= percpu_quarantines;
+       new_quarantine_size = (new_quarantine_size < percpu_quarantines) ?
+               0 : new_quarantine_size - percpu_quarantines;
        WRITE_ONCE(quarantine_size, new_quarantine_size);
 
        last = global_quarantine.head;
index e74d7080ec9e63681ce3145cda26d2fce6eb8ed3..2ff0289ad061322298b472edba9eebb9abb302a7 100644 (file)
@@ -4077,14 +4077,32 @@ static struct cftype mem_cgroup_legacy_files[] = {
 
 static DEFINE_IDR(mem_cgroup_idr);
 
-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
 {
-       atomic_inc(&memcg->id.ref);
+       atomic_add(n, &memcg->id.ref);
 }
 
-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
 {
-       if (atomic_dec_and_test(&memcg->id.ref)) {
+       while (!atomic_inc_not_zero(&memcg->id.ref)) {
+               /*
+                * The root cgroup cannot be destroyed, so it's refcount must
+                * always be >= 1.
+                */
+               if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+                       VM_BUG_ON(1);
+                       break;
+               }
+               memcg = parent_mem_cgroup(memcg);
+               if (!memcg)
+                       memcg = root_mem_cgroup;
+       }
+       return memcg;
+}
+
+static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
+{
+       if (atomic_sub_and_test(n, &memcg->id.ref)) {
                idr_remove(&mem_cgroup_idr, memcg->id.id);
                memcg->id.id = 0;
 
@@ -4093,6 +4111,16 @@ static void mem_cgroup_id_put(struct mem_cgroup *memcg)
        }
 }
 
+static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+       mem_cgroup_id_get_many(memcg, 1);
+}
+
+static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+       mem_cgroup_id_put_many(memcg, 1);
+}
+
 /**
  * mem_cgroup_from_id - look up a memcg from a memcg id
  * @id: the memcg id to look up
@@ -4727,6 +4755,8 @@ static void __mem_cgroup_clear_mc(void)
                if (!mem_cgroup_is_root(mc.from))
                        page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
 
+               mem_cgroup_id_put_many(mc.from, mc.moved_swap);
+
                /*
                 * we charged both to->memory and to->memsw, so we
                 * should uncharge to->memory.
@@ -4734,9 +4764,9 @@ static void __mem_cgroup_clear_mc(void)
                if (!mem_cgroup_is_root(mc.to))
                        page_counter_uncharge(&mc.to->memory, mc.moved_swap);
 
-               css_put_many(&mc.from->css, mc.moved_swap);
+               mem_cgroup_id_get_many(mc.to, mc.moved_swap);
+               css_put_many(&mc.to->css, mc.moved_swap);
 
-               /* we've already done css_get(mc.to) */
                mc.moved_swap = 0;
        }
        memcg_oom_recover(from);
@@ -5800,7 +5830,7 @@ subsys_initcall(mem_cgroup_init);
  */
 void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 {
-       struct mem_cgroup *memcg;
+       struct mem_cgroup *memcg, *swap_memcg;
        unsigned short oldid;
 
        VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -5815,16 +5845,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
        if (!memcg)
                return;
 
-       mem_cgroup_id_get(memcg);
-       oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+       /*
+        * In case the memcg owning these pages has been offlined and doesn't
+        * have an ID allocated to it anymore, charge the closest online
+        * ancestor for the swap instead and transfer the memory+swap charge.
+        */
+       swap_memcg = mem_cgroup_id_get_online(memcg);
+       oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
        VM_BUG_ON_PAGE(oldid, page);
-       mem_cgroup_swap_statistics(memcg, true);
+       mem_cgroup_swap_statistics(swap_memcg, true);
 
        page->mem_cgroup = NULL;
 
        if (!mem_cgroup_is_root(memcg))
                page_counter_uncharge(&memcg->memory, 1);
 
+       if (memcg != swap_memcg) {
+               if (!mem_cgroup_is_root(swap_memcg))
+                       page_counter_charge(&swap_memcg->memsw, 1);
+               page_counter_uncharge(&memcg->memsw, 1);
+       }
+
        /*
         * Interrupts should be disabled here because the caller holds the
         * mapping->tree_lock lock which is taken with interrupts-off. It is
@@ -5863,11 +5904,14 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
        if (!memcg)
                return 0;
 
+       memcg = mem_cgroup_id_get_online(memcg);
+
        if (!mem_cgroup_is_root(memcg) &&
-           !page_counter_try_charge(&memcg->swap, 1, &counter))
+           !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+               mem_cgroup_id_put(memcg);
                return -ENOMEM;
+       }
 
-       mem_cgroup_id_get(memcg);
        oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
        VM_BUG_ON_PAGE(oldid, page);
        mem_cgroup_swap_statistics(memcg, true);
index 3894b65b155555f11076f0cae90f71e2475b6929..41266dc29f33fb1278d7e4e9d6fd2efab69380a1 100644 (file)
@@ -1219,6 +1219,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 
        /* init node's zones as empty zones, we don't have any present pages.*/
        free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+       pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
 
        /*
         * The node we allocated has no zone fallback lists. For avoiding
@@ -1249,6 +1250,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
 {
        arch_refresh_nodedata(nid, NULL);
+       free_percpu(pgdat->per_cpu_nodestats);
        arch_free_nodedata(pgdat);
        return;
 }
index 7d0a275df822e9e14c55e5d472cfc473ac3ae173..d53a9aa00977cbd0f81970e9e8a30b011cc73f31 100644 (file)
@@ -764,7 +764,7 @@ bool task_will_free_mem(struct task_struct *task)
 {
        struct mm_struct *mm = task->mm;
        struct task_struct *p;
-       bool ret;
+       bool ret = true;
 
        /*
         * Skip tasks without mm because it might have passed its exit_mm and
index ee744fa3b93d50a9215daf43966cc97f957c77a4..3fbe73a6fe4b6869dcd44a45de43928d4c08fe0c 100644 (file)
@@ -4060,7 +4060,7 @@ long si_mem_available(void)
        int lru;
 
        for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
-               pages[lru] = global_page_state(NR_LRU_BASE + lru);
+               pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 
        for_each_zone(zone)
                wmark_low += zone->watermark[WMARK_LOW];
@@ -4757,6 +4757,8 @@ int local_memory_node(int node)
 }
 #endif
 
+static void setup_min_unmapped_ratio(void);
+static void setup_min_slab_ratio(void);
 #else  /* CONFIG_NUMA */
 
 static void set_zonelist_order(void)
@@ -5878,9 +5880,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
 #ifdef CONFIG_NUMA
                zone->node = nid;
-               pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
-                                               / 100;
-               pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
 #endif
                zone->name = zone_names[j];
                zone->zone_pgdat = pgdat;
@@ -6801,6 +6800,12 @@ int __meminit init_per_zone_wmark_min(void)
        setup_per_zone_wmarks();
        refresh_zone_stat_thresholds();
        setup_per_zone_lowmem_reserve();
+
+#ifdef CONFIG_NUMA
+       setup_min_unmapped_ratio();
+       setup_min_slab_ratio();
+#endif
+
        return 0;
 }
 core_initcall(init_per_zone_wmark_min)
@@ -6842,43 +6847,58 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
 }
 
 #ifdef CONFIG_NUMA
+static void setup_min_unmapped_ratio(void)
+{
+       pg_data_t *pgdat;
+       struct zone *zone;
+
+       for_each_online_pgdat(pgdat)
+               pgdat->min_unmapped_pages = 0;
+
+       for_each_zone(zone)
+               zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
+                               sysctl_min_unmapped_ratio) / 100;
+}
+
+
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
-       struct pglist_data *pgdat;
-       struct zone *zone;
        int rc;
 
        rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (rc)
                return rc;
 
+       setup_min_unmapped_ratio();
+
+       return 0;
+}
+
+static void setup_min_slab_ratio(void)
+{
+       pg_data_t *pgdat;
+       struct zone *zone;
+
        for_each_online_pgdat(pgdat)
                pgdat->min_slab_pages = 0;
 
        for_each_zone(zone)
-               zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
-                               sysctl_min_unmapped_ratio) / 100;
-       return 0;
+               zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
+                               sysctl_min_slab_ratio) / 100;
 }
 
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
-       struct pglist_data *pgdat;
-       struct zone *zone;
        int rc;
 
        rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (rc)
                return rc;
 
-       for_each_online_pgdat(pgdat)
-               pgdat->min_slab_pages = 0;
+       setup_min_slab_ratio();
 
-       for_each_zone(zone)
-               zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
-                               sysctl_min_slab_ratio) / 100;
        return 0;
 }
 #endif
index 709bc83703b1bfef419fa674ef5b5e28f5d70f05..1ef36404e7b2d7daeef2061ff8f79524d7750bb9 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1284,8 +1284,9 @@ void page_add_file_rmap(struct page *page, bool compound)
                VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
                __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
        } else {
-               if (PageTransCompound(page)) {
-                       VM_BUG_ON_PAGE(!PageLocked(page), page);
+               if (PageTransCompound(page) && page_mapping(page)) {
+                       VM_WARN_ON_ONCE(!PageLocked(page));
+
                        SetPageDoubleMap(compound_head(page));
                        if (PageMlocked(page))
                                clear_page_mlock(compound_head(page));
@@ -1303,7 +1304,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
 {
        int i, nr = 1;
 
-       VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
+       VM_BUG_ON_PAGE(compound && !PageHead(page), page);
        lock_page_memcg(page);
 
        /* Hugepages are not counted in NR_FILE_MAPPED for now. */
index 7f7748a0f9e1f738fd1ffcaceccdee2ae54d8d35..fd8b2b5741b141a7bc4457d93929c100d988a639 100644 (file)
@@ -3975,7 +3975,9 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
 
 struct kobj_attribute shmem_enabled_attr =
        __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
 
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
 bool shmem_huge_enabled(struct vm_area_struct *vma)
 {
        struct inode *inode = file_inode(vma->vm_file);
@@ -4006,7 +4008,7 @@ bool shmem_huge_enabled(struct vm_area_struct *vma)
                        return false;
        }
 }
-#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
 
 #else /* !CONFIG_SHMEM */
 
index cead06394e9e5e96539f11624698b8e797bc3a43..9adae58462f8191b22659b1aa438ec637f6fc765 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3629,6 +3629,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
  */
 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 {
+       LIST_HEAD(discard);
        struct page *page, *h;
 
        BUG_ON(irqs_disabled());
@@ -3636,13 +3637,16 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
        list_for_each_entry_safe(page, h, &n->partial, lru) {
                if (!page->inuse) {
                        remove_partial(n, page);
-                       discard_slab(s, page);
+                       list_add(&page->lru, &discard);
                } else {
                        list_slab_objects(s, page,
                        "Objects remaining in %s on __kmem_cache_shutdown()");
                }
        }
        spin_unlock_irq(&n->list_lock);
+
+       list_for_each_entry_safe(page, h, &discard, lru)
+               discard_slab(s, page);
 }
 
 /*
index 4acb1d5417aaf980bc7797c817eb9a9a350ecbf5..f24b25c25106fb55fb713b7308a8d43413a2143b 100644 (file)
@@ -507,8 +507,8 @@ err_out:
                /* wakeup anybody waiting for slots to pin pages */
                wake_up(&vp_wq);
        }
-       kfree(in_pages);
-       kfree(out_pages);
+       kvfree(in_pages);
+       kvfree(out_pages);
        return err;
 }
 
index c83326c5ba580480b877079d2a465430ff408cf5..ef34a02719d73147f4a4af29f3d861b4dc34391e 100644 (file)
@@ -574,7 +574,7 @@ static void complete_generic_request(struct ceph_mon_generic_request *req)
        put_generic_request(req);
 }
 
-void cancel_generic_request(struct ceph_mon_generic_request *req)
+static void cancel_generic_request(struct ceph_mon_generic_request *req)
 {
        struct ceph_mon_client *monc = req->monc;
        struct ceph_mon_generic_request *lookup_req;
index b5ec09612ff71daeb1b95ed4b6f939a172cc7545..a97e7b506612b4255f4b99de76d74c46a1b3896d 100644 (file)
@@ -4220,7 +4220,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
 
                pages = ceph_alloc_page_vector(calc_pages_for(0, data_len),
                                               GFP_NOIO);
-               if (!pages) {
+               if (IS_ERR(pages)) {
                        ceph_msg_put(m);
                        return NULL;
                }
index ca53c8319209469a25011b15d26af951a09d392d..22fb96efcf3467713a9e5430057ead684326e3db 100644 (file)
@@ -84,12 +84,6 @@ retry:
 }
 EXPORT_SYMBOL(ceph_find_or_create_string);
 
-static void ceph_free_string(struct rcu_head *head)
-{
-       struct ceph_string *cs = container_of(head, struct ceph_string, rcu);
-       kfree(cs);
-}
-
 void ceph_release_string(struct kref *ref)
 {
        struct ceph_string *cs = container_of(ref, struct ceph_string, kref);
@@ -101,7 +95,7 @@ void ceph_release_string(struct kref *ref)
        }
        spin_unlock(&string_tree_lock);
 
-       call_rcu(&cs->rcu, ceph_free_string);
+       kfree_rcu(cs, rcu);
 }
 EXPORT_SYMBOL(ceph_release_string);
 
index 23c8e7c3965651ad5ee03ee617ad92d06646802f..976c7812bbd520e51d34eb542b15f0e4730034b9 100644 (file)
@@ -340,12 +340,14 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
 }
 
 static struct gss_upcall_msg *
-__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid)
+__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth)
 {
        struct gss_upcall_msg *pos;
        list_for_each_entry(pos, &pipe->in_downcall, list) {
                if (!uid_eq(pos->uid, uid))
                        continue;
+               if (auth && pos->auth->service != auth->service)
+                       continue;
                atomic_inc(&pos->count);
                dprintk("RPC:       %s found msg %p\n", __func__, pos);
                return pos;
@@ -365,7 +367,7 @@ gss_add_msg(struct gss_upcall_msg *gss_msg)
        struct gss_upcall_msg *old;
 
        spin_lock(&pipe->lock);
-       old = __gss_find_upcall(pipe, gss_msg->uid);
+       old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth);
        if (old == NULL) {
                atomic_inc(&gss_msg->count);
                list_add(&gss_msg->list, &pipe->in_downcall);
@@ -714,7 +716,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        err = -ENOENT;
        /* Find a matching upcall */
        spin_lock(&pipe->lock);
-       gss_msg = __gss_find_upcall(pipe, uid);
+       gss_msg = __gss_find_upcall(pipe, uid, NULL);
        if (gss_msg == NULL) {
                spin_unlock(&pipe->lock);
                goto err_put_ctx;
index cb49898a5a58aacfadceda27a07ceb45eb88a8d3..7f79fb7dc6a00d6cc4082815d35fab4c60be1943 100644 (file)
@@ -2638,6 +2638,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 {
        struct rpc_xprt_switch *xps;
        struct rpc_xprt *xprt;
+       unsigned long reconnect_timeout;
        unsigned char resvport;
        int ret = 0;
 
@@ -2649,6 +2650,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
                return -EAGAIN;
        }
        resvport = xprt->resvport;
+       reconnect_timeout = xprt->max_reconnect_timeout;
        rcu_read_unlock();
 
        xprt = xprt_create_transport(xprtargs);
@@ -2657,6 +2659,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
                goto out_put_switch;
        }
        xprt->resvport = resvport;
+       xprt->max_reconnect_timeout = reconnect_timeout;
 
        rpc_xprt_switch_set_roundrobin(xps);
        if (setup) {
@@ -2673,6 +2676,27 @@ out_put_switch:
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
 
+static int
+rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+               struct rpc_xprt *xprt,
+               void *data)
+{
+       unsigned long timeout = *((unsigned long *)data);
+
+       if (timeout < xprt->max_reconnect_timeout)
+               xprt->max_reconnect_timeout = timeout;
+       return 0;
+}
+
+void
+rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
+{
+       rpc_clnt_iterate_for_each_xprt(clnt,
+                       rpc_xprt_cap_max_reconnect_timeout,
+                       &timeo);
+}
+EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static void rpc_show_header(void)
 {
index 8313960cac524dd36d220f9b55d124435400f25a..ea244b29138b0b86cf7860ce5c1e4605ade86a2a 100644 (file)
@@ -680,6 +680,20 @@ out:
        spin_unlock_bh(&xprt->transport_lock);
 }
 
+static bool
+xprt_has_timer(const struct rpc_xprt *xprt)
+{
+       return xprt->idle_timeout != 0;
+}
+
+static void
+xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
+       __must_hold(&xprt->transport_lock)
+{
+       if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+               mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
+}
+
 static void
 xprt_init_autodisconnect(unsigned long data)
 {
@@ -688,6 +702,8 @@ xprt_init_autodisconnect(unsigned long data)
        spin_lock(&xprt->transport_lock);
        if (!list_empty(&xprt->recv))
                goto out_abort;
+       /* Reset xprt->last_used to avoid connect/autodisconnect cycling */
+       xprt->last_used = jiffies;
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                goto out_abort;
        spin_unlock(&xprt->transport_lock);
@@ -725,6 +741,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
                goto out;
        xprt->snd_task =NULL;
        xprt->ops->release_xprt(xprt, NULL);
+       xprt_schedule_autodisconnect(xprt);
 out:
        spin_unlock_bh(&xprt->transport_lock);
        wake_up_bit(&xprt->state, XPRT_LOCKED);
@@ -888,11 +905,6 @@ static void xprt_timer(struct rpc_task *task)
        spin_unlock_bh(&xprt->transport_lock);
 }
 
-static inline int xprt_has_timer(struct rpc_xprt *xprt)
-{
-       return xprt->idle_timeout != 0;
-}
-
 /**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
@@ -1280,9 +1292,7 @@ void xprt_release(struct rpc_task *task)
        if (!list_empty(&req->rq_list))
                list_del(&req->rq_list);
        xprt->last_used = jiffies;
-       if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
-               mod_timer(&xprt->timer,
-                               xprt->last_used + xprt->idle_timeout);
+       xprt_schedule_autodisconnect(xprt);
        spin_unlock_bh(&xprt->transport_lock);
        if (req->rq_buffer)
                xprt->ops->buf_free(req->rq_buffer);
index 111767ab124aa4037dfe8c7040866d7196343292..8ede3bc52481b73c82834aa684111013c6d40cad 100644 (file)
@@ -177,7 +177,6 @@ static struct ctl_table sunrpc_table[] = {
  * increase over time if the server is down or not responding.
  */
 #define XS_TCP_INIT_REEST_TO   (3U * HZ)
-#define XS_TCP_MAX_REEST_TO    (5U * 60 * HZ)
 
 /*
  * TCP idle timeout; client drops the transport socket if it is idle
@@ -2173,6 +2172,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                write_unlock_bh(&sk->sk_callback_lock);
        }
        xs_udp_do_set_buffer_size(xprt);
+
+       xprt->stat.connect_start = jiffies;
 }
 
 static void xs_udp_setup_socket(struct work_struct *work)
@@ -2236,6 +2237,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                unsigned int keepcnt = xprt->timeout->to_retries + 1;
                unsigned int opt_on = 1;
                unsigned int timeo;
+               unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
 
                /* TCP Keepalive options */
                kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2247,6 +2249,16 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
                                (char *)&keepcnt, sizeof(keepcnt));
 
+               /* Avoid temporary address, they are bad for long-lived
+                * connections such as NFS mounts.
+                * RFC4941, section 3.6 suggests that:
+                *    Individual applications, which have specific
+                *    knowledge about the normal duration of connections,
+                *    MAY override this as appropriate.
+                */
+               kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
+                               (char *)&addr_pref, sizeof(addr_pref));
+
                /* TCP user timeout (see RFC5482) */
                timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
                        (xprt->timeout->to_retries + 1);
@@ -2295,6 +2307,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                /* SYN_SENT! */
                if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
                        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+               break;
+       case -EADDRNOTAVAIL:
+               /* Source port number is unavailable. Try a new one! */
+               transport->srcport = 0;
        }
 out:
        return ret;
@@ -2369,6 +2385,25 @@ out:
        xprt_wake_pending_tasks(xprt, status);
 }
 
+static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
+{
+       unsigned long start, now = jiffies;
+
+       start = xprt->stat.connect_start + xprt->reestablish_timeout;
+       if (time_after(start, now))
+               return start - now;
+       return 0;
+}
+
+static void xs_reconnect_backoff(struct rpc_xprt *xprt)
+{
+       xprt->reestablish_timeout <<= 1;
+       if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+               xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+       if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+               xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+}
+
 /**
  * xs_connect - connect a socket to a remote endpoint
  * @xprt: pointer to transport structure
@@ -2386,6 +2421,7 @@ out:
 static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       unsigned long delay = 0;
 
        WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
 
@@ -2397,19 +2433,15 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
                /* Start by resetting any existing state */
                xs_reset_transport(transport);
 
-               queue_delayed_work(xprtiod_workqueue,
-                                  &transport->connect_worker,
-                                  xprt->reestablish_timeout);
-               xprt->reestablish_timeout <<= 1;
-               if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
-                       xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-               if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
-                       xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
-       } else {
+               delay = xs_reconnect_delay(xprt);
+               xs_reconnect_backoff(xprt);
+
+       } else
                dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
-               queue_delayed_work(xprtiod_workqueue,
-                                  &transport->connect_worker, 0);
-       }
+
+       queue_delayed_work(xprtiod_workqueue,
+                       &transport->connect_worker,
+                       delay);
 }
 
 /**
@@ -2961,6 +2993,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
        xprt->ops = &xs_tcp_ops;
        xprt->timeout = &xs_tcp_default_timeout;
 
+       xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+
        INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
        INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
 
index 122fcdaf42c86cec7a5fbce08cc3b406f692f6c5..49a00d54b835f156745c27bfb12b1f64bcf9140c 100755 (executable)
@@ -432,7 +432,7 @@ foreach my $file (@ARGV) {
            die "$P: file '${file}' not found\n";
        }
     }
-    if ($from_filename || vcs_file_exists($file)) {
+    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
        $file =~ s/^\Q${cur_path}\E//;  #strip any absolute path
        $file =~ s/^\Q${lk_path}\E//;   #or the path to the lk tree
        push(@files, $file);
index 89dacf9b4e6cbcdd7caaed257ce77a510083c3c4..160c7f71372289034f87953de4650fb08498298a 100644 (file)
@@ -906,20 +906,23 @@ static int azx_resume(struct device *dev)
        struct snd_card *card = dev_get_drvdata(dev);
        struct azx *chip;
        struct hda_intel *hda;
+       struct hdac_bus *bus;
 
        if (!card)
                return 0;
 
        chip = card->private_data;
        hda = container_of(chip, struct hda_intel, chip);
+       bus = azx_bus(chip);
        if (chip->disabled || hda->init_failed || !chip->running)
                return 0;
 
-       if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL
-               && hda->need_i915_power) {
-               snd_hdac_display_power(azx_bus(chip), true);
-               snd_hdac_i915_set_bclk(azx_bus(chip));
+       if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
+               snd_hdac_display_power(bus, true);
+               if (hda->need_i915_power)
+                       snd_hdac_i915_set_bclk(bus);
        }
+
        if (chip->msi)
                if (pci_enable_msi(pci) < 0)
                        chip->msi = 0;
@@ -929,6 +932,11 @@ static int azx_resume(struct device *dev)
 
        hda_intel_init_chip(chip, true);
 
+       /* power down again for link-controlled chips */
+       if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+           !hda->need_i915_power)
+               snd_hdac_display_power(bus, false);
+
        snd_power_change_state(card, SNDRV_CTL_POWER_D0);
 
        trace_azx_resume(chip);
@@ -1008,6 +1016,7 @@ static int azx_runtime_resume(struct device *dev)
 
        chip = card->private_data;
        hda = container_of(chip, struct hda_intel, chip);
+       bus = azx_bus(chip);
        if (chip->disabled || hda->init_failed)
                return 0;
 
@@ -1015,15 +1024,9 @@ static int azx_runtime_resume(struct device *dev)
                return 0;
 
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
-               bus = azx_bus(chip);
-               if (hda->need_i915_power) {
-                       snd_hdac_display_power(bus, true);
+               snd_hdac_display_power(bus, true);
+               if (hda->need_i915_power)
                        snd_hdac_i915_set_bclk(bus);
-               } else {
-                       /* toggle codec wakeup bit for STATESTS read */
-                       snd_hdac_set_codec_wakeup(bus, true);
-                       snd_hdac_set_codec_wakeup(bus, false);
-               }
        }
 
        /* Read STATESTS before controller reset */
@@ -1043,6 +1046,11 @@ static int azx_runtime_resume(struct device *dev)
        azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) &
                        ~STATESTS_INT_MASK);
 
+       /* power down again for link-controlled chips */
+       if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+           !hda->need_i915_power)
+               snd_hdac_display_power(bus, false);
+
        trace_azx_runtime_resume(chip);
        return 0;
 }
index 6adde457b602e08aedd1806e8b79863d65e006cc..6cf1f35974558053101e00351482574c2be6a988 100644 (file)
@@ -1128,6 +1128,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 {
        /* devices which do not support reading the sample rate. */
        switch (chip->usb_id) {
+       case USB_ID(0x041E, 0x4080): /* Creative Live Cam VF0610 */
        case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
        case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
        case USB_ID(0x045E, 0x076E): /* MS Lifecam HD-5001 */
@@ -1138,6 +1139,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
        case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
        case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
        case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
+       case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */
        case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
        case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
        case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
index 4a413485f9eb8ef58ec71c77ff2594f4300c8ea6..92a8308b96f64cb6ce845a8379ca06cb9a6a00d6 100644 (file)
 #define X86_FEATURE_RDSEED     ( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX                ( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP       ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT    ( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB       ( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
 #define X86_BUG_FXSAVE_LEAK    X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR        X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS        X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG       X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE   X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
  */
 #define X86_BUG_ESPFIX         X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
-
+#define X86_BUG_NULL_SEG       X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE   X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR                X86_BUG(12) /* IPI required to wake up remote CPU */
 #endif /* _ASM_X86_CPUFEATURES_H */
index 911e9358ceb184b6b0b0f38b9c7b853fc4506fbe..85599ad4d0247863cef655d02b9a4b3f83c77fb7 100644 (file)
@@ -56,5 +56,7 @@
 #define DISABLED_MASK14        0
 #define DISABLED_MASK15        0
 #define DISABLED_MASK16        (DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK17        0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
index 4916144e3c42668a3e07af33859b4a1af3f2985b..fac9a5c0abe94b233b72b35bca8c7a665847b694 100644 (file)
@@ -99,5 +99,7 @@
 #define REQUIRED_MASK14        0
 #define REQUIRED_MASK15        0
 #define REQUIRED_MASK16        0
+#define REQUIRED_MASK17        0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
index 5b15d94a33f818d04ee7ae2a0f5685125bd89a40..37fee272618f1de348a7d5961f1792debba72991 100644 (file)
@@ -78,7 +78,6 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
-#define EXIT_REASON_PCOMMIT             65
 
 #define VMX_EXIT_REASONS \
        { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
        { EXIT_REASON_INVVPID,               "INVVPID" }, \
        { EXIT_REASON_INVPCID,               "INVPCID" }, \
        { EXIT_REASON_XSAVES,                "XSAVES" }, \
-       { EXIT_REASON_XRSTORS,               "XRSTORS" }, \
-       { EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
index 406459b935a27c4f9b518426a4fd60493432221d..da218fec605657ee415f8ad71a95d8851330a9de 100644 (file)
@@ -84,6 +84,7 @@ enum bpf_map_type {
        BPF_MAP_TYPE_PERCPU_HASH,
        BPF_MAP_TYPE_PERCPU_ARRAY,
        BPF_MAP_TYPE_STACK_TRACE,
+       BPF_MAP_TYPE_CGROUP_ARRAY,
 };
 
 enum bpf_prog_type {
@@ -93,6 +94,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_SCHED_CLS,
        BPF_PROG_TYPE_SCHED_ACT,
        BPF_PROG_TYPE_TRACEPOINT,
+       BPF_PROG_TYPE_XDP,
 };
 
 #define BPF_PSEUDO_MAP_FD      1
@@ -313,6 +315,66 @@ enum bpf_func_id {
         */
        BPF_FUNC_skb_get_tunnel_opt,
        BPF_FUNC_skb_set_tunnel_opt,
+
+       /**
+        * bpf_skb_change_proto(skb, proto, flags)
+        * Change protocol of the skb. Currently supported is
+        * v4 -> v6, v6 -> v4 transitions. The helper will also
+        * resize the skb. eBPF program is expected to fill the
+        * new headers via skb_store_bytes and lX_csum_replace.
+        * @skb: pointer to skb
+        * @proto: new skb->protocol type
+        * @flags: reserved
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_proto,
+
+       /**
+        * bpf_skb_change_type(skb, type)
+        * Change packet type of skb.
+        * @skb: pointer to skb
+        * @type: new skb->pkt_type type
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_type,
+
+       /**
+        * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+        * @skb: pointer to skb
+        * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+        * @index: index of the cgroup in the bpf_map
+        * Return:
+        *   == 0 skb failed the cgroup2 descendant test
+        *   == 1 skb succeeded the cgroup2 descendant test
+        *    < 0 error
+        */
+       BPF_FUNC_skb_in_cgroup,
+
+       /**
+        * bpf_get_hash_recalc(skb)
+        * Retrieve and possibly recalculate skb->hash.
+        * @skb: pointer to skb
+        * Return: hash
+        */
+       BPF_FUNC_get_hash_recalc,
+
+       /**
+        * u64 bpf_get_current_task(void)
+        * Returns current task_struct
+        * Return: current
+        */
+       BPF_FUNC_get_current_task,
+
+       /**
+        * bpf_probe_write_user(void *dst, void *src, int len)
+        * safely attempt to write to a location
+        * @dst: destination address in userspace
+        * @src: source address on stack
+        * @len: number of bytes to copy
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_probe_write_user,
+
        __BPF_FUNC_MAX_ID,
 };
 
@@ -347,9 +409,11 @@ enum bpf_func_id {
 #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
 #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
 
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
 #define BPF_F_INDEX_MASK               0xffffffffULL
 #define BPF_F_CURRENT_CPU              BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK              (0xfffffULL << 32)
 
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
@@ -386,4 +450,24 @@ struct bpf_tunnel_key {
        __u32 tunnel_label;
 };
 
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+       XDP_ABORTED = 0,
+       XDP_DROP,
+       XDP_PASS,
+       XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+       __u32 data;
+       __u32 data_end;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
index 736da44596e451fa1a14d9a045f7feda269a0779..b303bcdd8ed15fb9d140e0e7369388bc714aaace 100644 (file)
@@ -176,10 +176,18 @@ Each probe argument follows below syntax.
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
 
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
 
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
 LINE SYNTAX
 -----------
 Line range is described by following syntax.
index 1f6c70594f0f79e378163c6430b9028642eee45d..053bbbd84ece30c673afe7e176328f8390a6bda9 100644 (file)
@@ -116,8 +116,8 @@ OPTIONS
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-       srcline, period, iregs, brstack, brstacksym, flags.
-        Field list can be prepended with the type, trace, sw or hw,
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
+        callindent. Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
 
index c6d0f91731a14732333af62d0a40a3ea43fb4c99..8d4dc97d80baeeadd3bd2eb1d966f5fc8292bdd0 100644 (file)
@@ -54,10 +54,6 @@ int arch__compare_symbol_names(const char *namea, const char *nameb)
 #endif
 
 #if defined(_CALL_ELF) && _CALL_ELF == 2
-bool arch__prefers_symtab(void)
-{
-       return true;
-}
 
 #ifdef HAVE_LIBELF_SUPPORT
 void arch__sym_update(struct symbol *s, GElf_Sym *sym)
@@ -100,4 +96,27 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
                        tev->point.offset += lep_offset;
        }
 }
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+                                          int ntevs)
+{
+       struct probe_trace_event *tev;
+       struct map *map;
+       struct symbol *sym = NULL;
+       struct rb_node *tmp;
+       int i = 0;
+
+       map = get_target_map(pev->target, pev->uprobes);
+       if (!map || map__load(map, NULL) < 0)
+               return;
+
+       for (i = 0; i < ntevs; i++) {
+               tev = &pev->tevs[i];
+               map__for_each_symbol(map, sym, tmp) {
+                       if (map->unmap_ip(map, sym->start) == tev->point.address)
+                               arch__fix_tev_from_maps(pev, tev, map, sym);
+               }
+       }
+}
+
 #endif
index 971ff91b16cb3be52702cca780c3df818d52c51a..9c640a8081c70a48a1c9090c809fa4b96f918602 100644 (file)
@@ -2116,7 +2116,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                     "Valid types: hw,sw,trace,raw. "
                     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
                     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-                    "callindent", parse_output_fields),
+                    "bpf-output,callindent", parse_output_fields),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
                    "system-wide collection from all CPUs"),
        OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
index 0c16d20d7e32fa2eb7377247c2e5542d2a5d076a..3c7452b39f57649b05d675db3d19395fb765df2d 100644 (file)
@@ -331,7 +331,7 @@ static int read_counter(struct perf_evsel *counter)
        return 0;
 }
 
-static void read_counters(bool close_counters)
+static void read_counters(void)
 {
        struct perf_evsel *counter;
 
@@ -341,11 +341,6 @@ static void read_counters(bool close_counters)
 
                if (perf_stat_process_counter(&stat_config, counter))
                        pr_warning("failed to process counter %s\n", counter->name);
-
-               if (close_counters) {
-                       perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
-                                            thread_map__nr(evsel_list->threads));
-               }
        }
 }
 
@@ -353,7 +348,7 @@ static void process_interval(void)
 {
        struct timespec ts, rs;
 
-       read_counters(false);
+       read_counters();
 
        clock_gettime(CLOCK_MONOTONIC, &ts);
        diff_timespec(&rs, &ts, &ref_time);
@@ -380,6 +375,17 @@ static void enable_counters(void)
                perf_evlist__enable(evsel_list);
 }
 
+static void disable_counters(void)
+{
+       /*
+        * If we don't have tracee (attaching to task or cpu), counters may
+        * still be running. To get accurate group ratios, we must stop groups
+        * from counting before reading their constituent counters.
+        */
+       if (!target__none(&target))
+               perf_evlist__disable(evsel_list);
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -657,11 +663,20 @@ try_again:
                }
        }
 
+       disable_counters();
+
        t1 = rdclock();
 
        update_stats(&walltime_nsecs_stats, t1 - t0);
 
-       read_counters(true);
+       /*
+        * Closing a group leader splits the group, and as we only disable
+        * group leaders, results in remaining events becoming enabled. To
+        * avoid arbitrary skew, we must read all counters before closing any
+        * group leaders.
+        */
+       read_counters();
+       perf_evlist__close(evsel_list);
 
        return WEXITSTATUS(status);
 }
index 953dc1ab2ed7bd0be442c4491c9c8a4862aa3386..28733962cd80a63e1376b5b71f4771ab8f8857f7 100644 (file)
@@ -170,15 +170,17 @@ static struct map *kernel_get_module_map(const char *module)
                module = "kernel";
 
        for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+               /* short_name is "[module]" */
                if (strncmp(pos->dso->short_name + 1, module,
-                           pos->dso->short_name_len - 2) == 0) {
+                           pos->dso->short_name_len - 2) == 0 &&
+                   module[pos->dso->short_name_len - 2] == '\0') {
                        return pos;
                }
        }
        return NULL;
 }
 
-static struct map *get_target_map(const char *target, bool user)
+struct map *get_target_map(const char *target, bool user)
 {
        /* Init maps of given executable or kernel */
        if (user)
@@ -385,7 +387,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
                if (uprobes)
                        address = sym->start;
                else
-                       address = map->unmap_ip(map, sym->start);
+                       address = map->unmap_ip(map, sym->start) - map->reloc;
                break;
        }
        if (!address) {
@@ -664,22 +666,14 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
        return ret;
 }
 
-/* Post processing the probe events */
-static int post_process_probe_trace_events(struct probe_trace_event *tevs,
-                                          int ntevs, const char *module,
-                                          bool uprobe)
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+                                      int ntevs)
 {
        struct ref_reloc_sym *reloc_sym;
        char *tmp;
        int i, skipped = 0;
 
-       if (uprobe)
-               return add_exec_to_probe_trace_events(tevs, ntevs, module);
-
-       /* Note that currently ref_reloc_sym based probe is not for drivers */
-       if (module)
-               return add_module_to_probe_trace_events(tevs, ntevs, module);
-
        reloc_sym = kernel_get_ref_reloc_sym();
        if (!reloc_sym) {
                pr_warning("Relocated base symbol is not found!\n");
@@ -711,6 +705,34 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
        return skipped;
 }
 
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+                                     int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+                                          struct probe_trace_event *tevs,
+                                          int ntevs, const char *module,
+                                          bool uprobe)
+{
+       int ret;
+
+       if (uprobe)
+               ret = add_exec_to_probe_trace_events(tevs, ntevs, module);
+       else if (module)
+               /* Currently ref_reloc_sym based probe is not for drivers */
+               ret = add_module_to_probe_trace_events(tevs, ntevs, module);
+       else
+               ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+       if (ret >= 0)
+               arch__post_process_probe_trace_events(pev, ntevs);
+
+       return ret;
+}
+
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
                                          struct probe_trace_event **tevs)
@@ -749,7 +771,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 
        if (ntevs > 0) {        /* Succeeded to find trace events */
                pr_debug("Found %d probe_trace_events.\n", ntevs);
-               ret = post_process_probe_trace_events(*tevs, ntevs,
+               ret = post_process_probe_trace_events(pev, *tevs, ntevs,
                                                pev->target, pev->uprobes);
                if (ret < 0 || ret == ntevs) {
                        clear_probe_trace_events(*tevs, ntevs);
@@ -2936,8 +2958,6 @@ errout:
        return err;
 }
 
-bool __weak arch__prefers_symtab(void) { return false; }
-
 /* Concatinate two arrays */
 static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
 {
@@ -3158,12 +3178,6 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
        if (ret > 0 || pev->sdt)        /* SDT can be found only in the cache */
                return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
 
-       if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
-               ret = find_probe_trace_events_from_map(pev, tevs);
-               if (ret > 0)
-                       return ret; /* Found in symbol table */
-       }
-
        /* Convert perf_probe_event with debuginfo */
        ret = try_to_find_probe_trace_events(pev, tevs);
        if (ret != 0)
index e18ea9fe63857cb7a9b382dac563fd2c8cdfbd85..f4f45db77c1c1ec59c3ee505f525f2b2561530ee 100644 (file)
@@ -158,7 +158,6 @@ int show_line_range(struct line_range *lr, const char *module, bool user);
 int show_available_vars(struct perf_probe_event *pevs, int npevs,
                        struct strfilter *filter);
 int show_available_funcs(const char *module, struct strfilter *filter, bool user);
-bool arch__prefers_symtab(void);
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
                             struct probe_trace_event *tev, struct map *map,
                             struct symbol *sym);
@@ -173,4 +172,9 @@ int e_snprintf(char *str, size_t size, const char *format, ...)
 int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
                            struct perf_probe_arg *pvar);
 
+struct map *get_target_map(const char *target, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+                                          int ntevs);
+
 #endif /*_PROBE_EVENT_H */
index f2d9ff064e2de720247e77f9645faf94b6b4ce57..5c290c682afe7176607fe01f4d29742b1821f1a9 100644 (file)
@@ -297,10 +297,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
        char sbuf[STRERR_BUFSIZE];
        int bsize, boffs, total;
        int ret;
+       char sign;
 
        /* TODO: check all types */
-       if (cast && strcmp(cast, "string") != 0) {
+       if (cast && strcmp(cast, "string") != 0 &&
+           strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
                /* Non string type is OK */
+               /* and respect signedness cast */
                tvar->type = strdup(cast);
                return (tvar->type == NULL) ? -ENOMEM : 0;
        }
@@ -361,6 +364,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                return (tvar->type == NULL) ? -ENOMEM : 0;
        }
 
+       if (cast && (strcmp(cast, "u") == 0))
+               sign = 'u';
+       else if (cast && (strcmp(cast, "s") == 0))
+               sign = 's';
+       else
+               sign = die_is_signed_type(&type) ? 's' : 'u';
+
        ret = dwarf_bytesize(&type);
        if (ret <= 0)
                /* No size ... try to use default type */
@@ -373,8 +383,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                        dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
                ret = MAX_BASIC_TYPE_BITS;
        }
-       ret = snprintf(buf, 16, "%c%d",
-                      die_is_signed_type(&type) ? 's' : 'u', ret);
+       ret = snprintf(buf, 16, "%c%d", sign, ret);
 
 formatted:
        if (ret < 0 || ret >= 16) {
index 947d21f3839838c433430b01fe52165522f87295..3d3cb8392c86029bb488f737564730e0cd8995bc 100644 (file)
@@ -588,7 +588,11 @@ static char *get_trace_output(struct hist_entry *he)
        } else {
                pevent_event_info(&seq, evsel->tp_format, &rec);
        }
-       return seq.buffer;
+       /*
+        * Trim the buffer, it starts at 4KB and we're not going to
+        * add anything more to this buffer.
+        */
+       return realloc(seq.buffer, seq.len + 1);
 }
 
 static int64_t
index 5404efa578a3fcea18ce5bbab2a991e0c3d98b73..dd48f421844c7902773526d8c0845109ab9c5e55 100644 (file)
@@ -13,6 +13,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
 #include <linux/libnvdimm.h>
 #include <linux/vmalloc.h>
 #include <linux/device.h>
@@ -1474,6 +1475,7 @@ static int nfit_test_probe(struct platform_device *pdev)
        if (nfit_test->setup != nfit_test0_setup)
                return 0;
 
+       flush_work(&acpi_desc->work);
        nfit_test->setup_hotplug = 1;
        nfit_test->setup(nfit_test);
 
index 3c40c9d0e6c70a87b83c9d92a61f0fc0f2f4570c..1cc6d64c39b709dd28f104ced47829d7427c435f 100644 (file)
@@ -8,7 +8,7 @@ ifeq ($(ARCH),powerpc)
 
 GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
 
-CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
+CFLAGS := -std=gnu99 -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
 
 export CFLAGS
 
This page took 0.116241 seconds and 5 git commands to generate.