Merge branch 'akpm-current/current'
authorStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 03:47:33 +0000 (13:47 +1000)
committerStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 03:47:33 +0000 (13:47 +1000)
238 files changed:
.gitattributes [new file with mode: 0644]
Documentation/DMA-attributes.txt
Documentation/filesystems/autofs4-mount-control.txt
Documentation/filesystems/autofs4.txt
Kbuild
MAINTAINERS
arch/alpha/kernel/vmlinux.lds.S
arch/arc/kernel/vmlinux.lds.S
arch/arm/include/asm/irq.h
arch/arm/include/asm/page.h
arch/arm/kernel/process.c
arch/arm/kernel/smp.c
arch/arm/kernel/vmlinux-xip.lds.S
arch/arm/kernel/vmlinux.lds.S
arch/arm64/kernel/process.c
arch/arm64/kernel/vmlinux.lds.S
arch/avr32/kernel/vmlinux.lds.S
arch/blackfin/kernel/vmlinux.lds.S
arch/c6x/kernel/vmlinux.lds.S
arch/cris/kernel/vmlinux.lds.S
arch/frv/kernel/vmlinux.lds.S
arch/h8300/kernel/vmlinux.lds.S
arch/hexagon/kernel/vmlinux.lds.S
arch/ia64/kernel/Makefile
arch/ia64/kernel/vmlinux.lds.S
arch/m32r/kernel/vmlinux.lds.S
arch/m68k/kernel/vmlinux-nommu.lds
arch/m68k/kernel/vmlinux-std.lds
arch/m68k/kernel/vmlinux-sun3.lds
arch/metag/kernel/vmlinux.lds.S
arch/microblaze/kernel/vmlinux.lds.S
arch/mips/cavium-octeon/setup.c
arch/mips/include/asm/irq.h
arch/mips/include/asm/kexec.h
arch/mips/include/asm/uprobes.h
arch/mips/kernel/crash.c
arch/mips/kernel/machine_kexec.c
arch/mips/kernel/process.c
arch/mips/kernel/vmlinux.lds.S
arch/mn10300/kernel/vmlinux.lds.S
arch/nios2/kernel/vmlinux.lds.S
arch/openrisc/kernel/vmlinux.lds.S
arch/parisc/kernel/vmlinux.lds.S
arch/powerpc/include/asm/mmzone.h
arch/powerpc/kernel/fadump.c
arch/powerpc/kernel/iommu.c
arch/powerpc/kernel/vmlinux.lds.S
arch/s390/include/asm/uprobes.h
arch/s390/kernel/compat_linux.c
arch/s390/kernel/vmlinux.lds.S
arch/score/kernel/vmlinux.lds.S
arch/sh/kernel/vmlinux.lds.S
arch/sparc/include/asm/irq_64.h
arch/sparc/kernel/process_64.c
arch/sparc/kernel/vmlinux.lds.S
arch/tile/include/asm/irq.h
arch/tile/kernel/entry.S
arch/tile/kernel/pmc.c
arch/tile/kernel/process.c
arch/tile/kernel/traps.c
arch/tile/kernel/vmlinux.lds.S
arch/tile/mm/mmap.c
arch/um/Makefile
arch/um/kernel/dyn.lds.S
arch/um/kernel/uml.lds.S
arch/unicore32/kernel/process.c
arch/unicore32/kernel/vmlinux.lds.S
arch/x86/include/asm/irq.h
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/kexec.h
arch/x86/include/asm/smp.h
arch/x86/kernel/acpi/cstate.c
arch/x86/kernel/apic/hw_nmi.c
arch/x86/kernel/crash.c
arch/x86/kernel/e820.c
arch/x86/kernel/machine_kexec_64.c
arch/x86/kernel/process.c
arch/x86/kernel/smp.c
arch/x86/kernel/sys_x86_64.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/um/shared/sysdep/kernel-offsets.h
arch/x86/um/user-offsets.c
arch/xtensa/kernel/vmlinux.lds.S
block/genhd.c
drivers/acpi/processor_idle.c
drivers/base/memory.c
drivers/char/random.c
drivers/cpuidle/driver.c
drivers/idle/intel_idle.c
drivers/nvme/host/pci.c
drivers/of/base.c
drivers/rapidio/rio_cm.c
drivers/staging/lustre/lustre/ptlrpc/sec.c
fs/autofs4/autofs_i.h
fs/autofs4/dev-ioctl.c
fs/autofs4/inode.c
fs/autofs4/root.c
fs/compat.c
fs/dax.c
fs/ext2/file.c
fs/ext4/file.c
fs/nfs/internal.h
fs/nfs/pagelist.c
fs/nfs/read.c
fs/nfs/write.c
fs/nfsd/auth.c
fs/nfsd/nfs4state.c
fs/ocfs2/cluster/tcp.c
fs/ocfs2/dlm/dlmconvert.c
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlmfs/dlmfs.c
fs/ocfs2/super.c
fs/pipe.c
fs/proc/array.c
fs/proc/base.c
fs/proc/fd.c
fs/proc/fd.h
fs/proc/internal.h
fs/proc/meminfo.c
fs/proc/stat.c
fs/proc/task_mmu.c
fs/seq_file.c
fs/xfs/xfs_file.c
include/asm-generic/vmlinux.lds.h
include/linux/auto_dev-ioctl.h
include/linux/auto_fs.h
include/linux/bitops.h
include/linux/bootmem.h
include/linux/compaction.h
include/linux/compat.h
include/linux/console.h
include/linux/cpu.h
include/linux/crc64_ecma.h [new file with mode: 0644]
include/linux/cred.h
include/linux/ctype.h
include/linux/dma-mapping.h
include/linux/huge_mm.h
include/linux/kbuild.h
include/linux/kernel.h
include/linux/kexec.h
include/linux/memblock.h
include/linux/memcontrol.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/nmi.h
include/linux/oom.h
include/linux/page_ext.h
include/linux/page_owner.h
include/linux/pagemap.h
include/linux/radix-tree.h
include/linux/random.h
include/linux/relay.h
include/linux/sched.h
include/linux/sem.h
include/linux/seq_file.h
include/linux/swap.h
include/linux/writeback.h
include/trace/events/compaction.h
include/trace/events/zsmalloc.h [new file with mode: 0644]
include/uapi/linux/auto_dev-ioctl.h [new file with mode: 0644]
include/uapi/linux/auto_fs.h
init/main.c
ipc/msg.c
ipc/sem.c
kernel/configs/android-base.config
kernel/configs/android-recommended.config
kernel/exit.c
kernel/fork.c
kernel/groups.c
kernel/panic.c
kernel/power/process.c
kernel/printk/printk.c
kernel/ptrace.c
kernel/relay.c
kernel/sched/idle.c
kernel/sysctl.c
kernel/uid16.c
kernel/watchdog.c
lib/Kconfig
lib/Makefile
lib/crc64_ecma.c [new file with mode: 0644]
lib/kstrtox.c
lib/nmi_backtrace.c
lib/stackdepot.c
lib/strncpy_from_user.c
mm/bootmem.c
mm/compaction.c
mm/debug.c
mm/filemap.c
mm/huge_memory.c
mm/internal.h
mm/khugepaged.c
mm/memblock.c
mm/memcontrol.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/mincore.c
mm/mlock.c
mm/nobootmem.c
mm/oom_kill.c
mm/page-writeback.c
mm/page_alloc.c
mm/page_ext.c
mm/page_owner.c
mm/slab.c
mm/slab.h
mm/swap.c
mm/swap_state.c
mm/swapfile.c
mm/vmacache.c
mm/vmalloc.c
mm/vmscan.c
mm/vmstat.c
mm/zsmalloc.c
net/ipv4/ping.c
net/sunrpc/auth_generic.c
net/sunrpc/auth_gss/gss_rpc_xdr.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/auth_unix.c
net/sunrpc/svcauth_unix.c
scripts/checkpatch.pl
scripts/const_structs.checkpatch [new file with mode: 0644]
scripts/mod/Makefile
scripts/mod/modpost.c
scripts/recordmcount.c
scripts/recordmcount.pl
scripts/spelling.txt
tools/testing/radix-tree/Makefile
tools/testing/radix-tree/iteration_check.c [new file with mode: 0644]
tools/testing/radix-tree/main.c
tools/testing/radix-tree/regression1.c
tools/testing/radix-tree/test.h
tools/testing/selftests/vm/.gitignore
tools/testing/selftests/vm/Makefile
tools/testing/selftests/vm/mlock-random-test.c [new file with mode: 0644]
tools/testing/selftests/vm/mlock2-tests.c
tools/testing/selftests/vm/mlock2.h [new file with mode: 0644]

diff --git a/.gitattributes b/.gitattributes
new file mode 100644 (file)
index 0000000..89c411b
--- /dev/null
@@ -0,0 +1,2 @@
+*.c   diff=cpp
+*.h   diff=cpp
index 2d455a5cf6718639062e237ff859fc045d7f4911..98bf7ac29aad8fff65e88bc6b1e6c2a6fc3a5033 100644 (file)
@@ -126,3 +126,20 @@ means that we won't try quite as hard to get them.
 
 NOTE: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
 though ARM64 patches will likely be posted soon.
+
+DMA_ATTR_NO_WARN
+----------------
+
+This tells the DMA-mapping subsystem to suppress allocation failure reports
+(similarly to __GFP_NOWARN).
+
+On some architectures allocation failures are reported with error messages
+to the system logs.  Although this can help to identify and debug problems,
+drivers which handle failures (eg, retry later) have no problems with them,
+and can actually flood the system logs with error messages that aren't any
+problem at all, depending on the implementation of the retry mechanism.
+
+So, this provides a way for drivers to avoid those error messages on calls
+where allocation failures are not a problem, and shouldn't bother the logs.
+
+NOTE: At the moment DMA_ATTR_NO_WARN is only implemented on PowerPC.
index aff22113a9866384279600728f6d3a2c92558248..50a3e01a36f80c14b85cb6c1307531a00065a733 100644 (file)
@@ -179,8 +179,19 @@ struct autofs_dev_ioctl {
                                 * including this struct */
        __s32 ioctlfd;          /* automount command fd */
 
-       __u32 arg1;             /* Command parameters */
-       __u32 arg2;
+       union {
+               struct args_protover            protover;
+               struct args_protosubver         protosubver;
+               struct args_openmount           openmount;
+               struct args_ready               ready;
+               struct args_fail                fail;
+               struct args_setpipefd           setpipefd;
+               struct args_timeout             timeout;
+               struct args_requester           requester;
+               struct args_expire              expire;
+               struct args_askumount           askumount;
+               struct args_ismountpoint        ismountpoint;
+       };
 
        char path[0];
 };
@@ -192,8 +203,8 @@ optionally be used to check a specific mount corresponding to a given
 mount point file descriptor, and when requesting the uid and gid of the
 last successful mount on a directory within the autofs file system.
 
-The fields arg1 and arg2 are used to communicate parameters and results of
-calls made as described below.
+The union is used to communicate parameters and results of calls made
+as described below.
 
 The path field is used to pass a path where it is needed and the size field
 is used account for the increased structure length when translating the
@@ -245,9 +256,9 @@ AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD
 Get the major and minor version of the autofs4 protocol version understood
 by loaded module. This call requires an initialized struct autofs_dev_ioctl
 with the ioctlfd field set to a valid autofs mount point descriptor
-and sets the requested version number in structure field arg1. These
-commands return 0 on success or one of the negative error codes if
-validation fails.
+and sets the requested version number in version field of struct args_protover
+or sub_version field of struct args_protosubver. These commands return
+0 on success or one of the negative error codes if validation fails.
 
 
 AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT
@@ -256,9 +267,9 @@ AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT
 Obtain and release a file descriptor for an autofs managed mount point
 path. The open call requires an initialized struct autofs_dev_ioctl with
 the path field set and the size field adjusted appropriately as well
-as the arg1 field set to the device number of the autofs mount. The
-device number can be obtained from the mount options shown in
-/proc/mounts. The close call requires an initialized struct
+as the devid field of struct args_openmount set to the device number of
+the autofs mount. The device number can be obtained from the mount options
+shown in /proc/mounts. The close call requires an initialized struct
 autofs_dev_ioct with the ioctlfd field set to the descriptor obtained
 from the open call. The release of the file descriptor can also be done
 with close(2) so any open descriptors will also be closed at process exit.
@@ -272,10 +283,10 @@ AUTOFS_DEV_IOCTL_READY_CMD and AUTOFS_DEV_IOCTL_FAIL_CMD
 Return mount and expire result status from user space to the kernel.
 Both of these calls require an initialized struct autofs_dev_ioctl
 with the ioctlfd field set to the descriptor obtained from the open
-call and the arg1 field set to the wait queue token number, received
-by user space in the foregoing mount or expire request. The arg2 field
-is set to the status to be returned. For the ready call this is always
-0 and for the fail call it is set to the errno of the operation.
+call and the token field of struct args_ready or struct args_fail set
+to the wait queue token number, received by user space in the foregoing
+mount or expire request. The status field of struct args_fail is set to
+the errno of the operation. It is set to 0 on success.
 
 
 AUTOFS_DEV_IOCTL_SETPIPEFD_CMD
@@ -290,9 +301,10 @@ mount be catatonic (see next call).
 
 The call requires an initialized struct autofs_dev_ioctl with the
 ioctlfd field set to the descriptor obtained from the open call and
-the arg1 field set to descriptor of the pipe. On success the call
-also sets the process group id used to identify the controlling process
-(eg. the owning automount(8) daemon) to the process group of the caller.
+the pipefd field of struct args_setpipefd set to descriptor of the pipe.
+On success the call also sets the process group id used to identify the
+controlling process (eg. the owning automount(8) daemon) to the process
+group of the caller.
 
 
 AUTOFS_DEV_IOCTL_CATATONIC_CMD
@@ -323,9 +335,8 @@ mount on the given path dentry.
 
 The call requires an initialized struct autofs_dev_ioctl with the path
 field set to the mount point in question and the size field adjusted
-appropriately as well as the arg1 field set to the device number of the
-containing autofs mount. Upon return the struct field arg1 contains the
-uid and arg2 the gid.
+appropriately. Upon return the uid field of struct args_requester contains
+the uid and gid field the gid.
 
 When reconstructing an autofs mount tree with active mounts we need to
 re-connect to mounts that may have used the original process uid and
@@ -343,8 +354,9 @@ this ioctl is called until no further expire candidates are found.
 The call requires an initialized struct autofs_dev_ioctl with the
 ioctlfd field set to the descriptor obtained from the open call. In
 addition an immediate expire, independent of the mount timeout, can be
-requested by setting the arg1 field to 1. If no expire candidates can
-be found the ioctl returns -1 with errno set to EAGAIN.
+requested by setting the how field of struct args_expire to 1. If no
+expire candidates can be found the ioctl returns -1 with errno set to
+EAGAIN.
 
 This call causes the kernel module to check the mount corresponding
 to the given ioctlfd for mounts that can be expired, issues an expire
@@ -357,7 +369,8 @@ Checks if an autofs mount point is in use.
 
 The call requires an initialized struct autofs_dev_ioctl with the
 ioctlfd field set to the descriptor obtained from the open call and
-it returns the result in the arg1 field, 1 for busy and 0 otherwise.
+it returns the result in the may_umount field of struct args_askumount,
+1 for busy and 0 otherwise.
 
 
 AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD
@@ -369,12 +382,12 @@ The call requires an initialized struct autofs_dev_ioctl. There are two
 possible variations. Both use the path field set to the path of the mount
 point to check and the size field adjusted appropriately. One uses the
 ioctlfd field to identify a specific mount point to check while the other
-variation uses the path and optionally arg1 set to an autofs mount type.
-The call returns 1 if this is a mount point and sets arg1 to the device
-number of the mount and field arg2 to the relevant super block magic
-number (described below) or 0 if it isn't a mountpoint. In both cases
-the the device number (as returned by new_encode_dev()) is returned
-in field arg1.
+variation uses the path and optionally in.type field of struct args_ismountpoint
+set to an autofs mount type. The call returns 1 if this is a mount point
+and sets out.devid field to the device number of the mount and out.magic
+field to the relevant super block magic number (described below) or 0 if
+it isn't a mountpoint. In both cases the the device number (as returned
+by new_encode_dev()) is returned in out.devid field.
 
 If supplied with a file descriptor we're looking for a specific mount,
 not necessarily at the top of the mounted stack. In this case the path
index 39d02e19fb6288f79769f356d7759a997bf9461f..8fac3fe7b8c971c0c39e54283c2fa0698922aaff 100644 (file)
@@ -203,9 +203,9 @@ initiated or is being considered, otherwise it returns 0.
 Mountpoint expiry
 -----------------
 
-The VFS has a mechansim for automatically expiring unused mounts,
+The VFS has a mechanism for automatically expiring unused mounts,
 much as it can expire any unused dentry information from the dcache.
-This is guided by the MNT_SHRINKABLE flag.  This  only applies to
+This is guided by the MNT_SHRINKABLE flag.  This only applies to
 mounts that were created by `d_automount()` returning a filesystem to be
 mounted.  As autofs doesn't return such a filesystem but leaves the
 mounting to the automount daemon, it must involve the automount daemon
@@ -298,7 +298,7 @@ remove directories and symlinks using normal filesystem operations.
 autofs knows whether a process requesting some operation is the daemon
 or not based on its process-group id number (see getpgid(1)).
 
-When an autofs filesystem it mounted the pgid of the mounting
+When an autofs filesystem is mounted the pgid of the mounting
 processes is recorded unless the "pgrp=" option is given, in which
 case that number is recorded instead.  Any request arriving from a
 process in that process group is considered to come from the daemon.
@@ -450,7 +450,7 @@ Commands are:
     numbers for existing filesystems can be found in
     `/proc/self/mountinfo`.
 - **AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD**: same as `close(ioctlfd)`.
-- **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**: if the  filesystem is in
+- **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**: if the filesystem is in
     catatonic mode, this can provide the write end of a new pipe
     in `arg1` to re-establish communication with a daemon.  The
     process group of the calling process is used to identify the
diff --git a/Kbuild b/Kbuild
index 3d0ae152af7c6271d2b6bf85acef2025e25d8cc5..63fc0e132bdc89c843700acae2df170c8cb52256 100644 (file)
--- a/Kbuild
+++ b/Kbuild
@@ -7,14 +7,6 @@
 # 4) Check for missing system calls
 # 5) Generate constants.py (may need bounds.h)
 
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
-       "/^->/{s:->#\(.*\):/* \1 */:; \
-       s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
-       s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
-       s:->::; p;}"
-endef
-
 # Use filechk to avoid rebuilds when a header changes, but the resulting file
 # does not
 define filechk_offsets
@@ -26,9 +18,7 @@ define filechk_offsets
         echo " *"; \
         echo " * This file was generated by Kbuild"; \
         echo " */"; \
-        echo ""; \
-        sed -ne $(sed-y); \
-        echo ""; \
+        sed -ne '/#define/{s/\$$//;s/#//2;s/$$/*\//;p;}'; \
         echo "#endif" )
 endef
 
index a93d351ac6f8cb43a2295b29a04e28fc574bc037..388b572d43a8d21761d5456589969d22d8c586de 100644 (file)
@@ -6167,7 +6167,7 @@ S:        Supported
 F:     drivers/cpufreq/intel_pstate.c
 
 INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
-M:     Maik Broemme <mbroemme@plusserver.de>
+M:     Maik Broemme <mbroemme@libmpq.org>
 L:     linux-fbdev@vger.kernel.org
 S:     Maintained
 F:     Documentation/fb/intelfb.txt
@@ -12688,7 +12688,7 @@ F:      include/linux/if_*vlan.h
 F:     net/8021q/
 
 VLYNQ BUS
-M:     Florian Fainelli <florian@openwrt.org>
+M:     Florian Fainelli <f.fainelli@gmail.com>
 L:     openwrt-devel@lists.openwrt.org (subscribers-only)
 S:     Maintained
 F:     drivers/vlynq/vlynq.c
index 647b84c15382347ec25efae55067e77fff26693e..cebecfb76fbf6e87e02836651d5b7279cff00350 100644 (file)
@@ -22,6 +22,7 @@ SECTIONS
                HEAD_TEXT
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                *(.fixup)
                *(.gnu.warning)
index 894e696bddaa3ca3fab6bdca6ed4f7ac451bb290..65652160cfda3abf1cbcad9c3e12c4b6cca1d6c2 100644 (file)
@@ -97,6 +97,7 @@ SECTIONS
                _text = .;
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                *(.fixup)
index 1bd9510de1b9ced64b1947f2734ddbf4c8ce4f5d..e53638c8ed8aafc4dccfe9509c8284405d6fde2a 100644 (file)
@@ -36,8 +36,9 @@ extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 #endif
 
 #ifdef CONFIG_SMP
-extern void arch_trigger_all_cpu_backtrace(bool);
-#define arch_trigger_all_cpu_backtrace(x) arch_trigger_all_cpu_backtrace(x)
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+                                          bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 #endif
 
 static inline int nr_legacy_irqs(void)
index 4355f0ec44d62e9b5d7c40132f28f0ff6710f387..f98baaec0a1588c433c7221ebe77d62e120e0b2e 100644 (file)
@@ -17,6 +17,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/personality.h> /* For READ_IMPLIES_EXEC */
+
 #ifndef CONFIG_MMU
 
 #include <asm/page-nommu.h>
index 612eb530f33fcd19bc4539facb26fc30a2583979..91d2d5b014145d5fdc4071d027fb2358b9831347 100644 (file)
@@ -318,8 +318,7 @@ unsigned long get_wchan(struct task_struct *p)
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-       unsigned long range_end = mm->brk + 0x02000000;
-       return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+       return randomize_page(mm->brk, 0x02000000);
 }
 
 #ifdef CONFIG_MMU
index 937c8920d741485a8992209a778a1ae385a93408..7dd14e8395e62976b3083f677b0e4d2ea0d71d95 100644 (file)
@@ -748,19 +748,10 @@ core_initcall(register_cpufreq_notifier);
 
 static void raise_nmi(cpumask_t *mask)
 {
-       /*
-        * Generate the backtrace directly if we are running in a calling
-        * context that is not preemptible by the backtrace IPI. Note
-        * that nmi_cpu_backtrace() automatically removes the current cpu
-        * from mask.
-        */
-       if (cpumask_test_cpu(smp_processor_id(), mask) && irqs_disabled())
-               nmi_cpu_backtrace(NULL);
-
        smp_cross_call(mask, IPI_CPU_BACKTRACE);
 }
 
-void arch_trigger_all_cpu_backtrace(bool include_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 {
-       nmi_trigger_all_cpu_backtrace(include_self, raise_nmi);
+       nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_nmi);
 }
index cba1ec899a693c85113659c98144c26993b8f350..7fa487ef7e2f67fb3e1ac7fa8fd3edb58f2145fc 100644 (file)
@@ -98,6 +98,7 @@ SECTIONS
                        IRQENTRY_TEXT
                        TEXT_TEXT
                        SCHED_TEXT
+                       CPUIDLE_TEXT
                        LOCK_TEXT
                        KPROBES_TEXT
                        *(.gnu.warning)
index d24e5dd2aa7a74d98eb718e9edc6cabfbd5341ed..f7f55df0bf7b3b654f40c3c847646f5ec482a0d9 100644 (file)
@@ -111,6 +111,7 @@ SECTIONS
                        SOFTIRQENTRY_TEXT
                        TEXT_TEXT
                        SCHED_TEXT
+                       CPUIDLE_TEXT
                        LOCK_TEXT
                        HYPERVISOR_TEXT
                        KPROBES_TEXT
index a4f5f766af08b010e66ec04889c098a5267eee05..27b2f1387df40b61b4aa059be5650d329964da6b 100644 (file)
@@ -372,12 +372,8 @@ unsigned long arch_align_stack(unsigned long sp)
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-       unsigned long range_end = mm->brk;
-
        if (is_compat_task())
-               range_end += 0x02000000;
+               return randomize_page(mm->brk, 0x02000000);
        else
-               range_end += 0x40000000;
-
-       return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+               return randomize_page(mm->brk, 0x40000000);
 }
index 5ce9b2929e0d1100dc6e5270f4ee6e336e15013e..1105aab1e6d6af4be3f88c0ee22d4cfafd5c5ce6 100644 (file)
@@ -122,6 +122,7 @@ SECTIONS
                        ENTRY_TEXT
                        TEXT_TEXT
                        SCHED_TEXT
+                       CPUIDLE_TEXT
                        LOCK_TEXT
                        KPROBES_TEXT
                        HYPERVISOR_TEXT
index a4589176bed5d7940d1f0e66d144a933892557f5..17f2730eb4978de7660933cbe04c233f6df78f14 100644 (file)
@@ -52,6 +52,7 @@ SECTIONS
                KPROBES_TEXT
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                *(.fixup)
                *(.gnu.warning)
index d920b959ff3a4b8cb3d6a28d67d40ebf6f7c457b..68069a120055b359b671b5890d2797f63027c007 100644 (file)
@@ -33,6 +33,7 @@ SECTIONS
 #ifndef CONFIG_SCHEDULE_L1
                SCHED_TEXT
 #endif
+               CPUIDLE_TEXT
                LOCK_TEXT
                IRQENTRY_TEXT
                SOFTIRQENTRY_TEXT
index 50bc10f97bcb4a234bff525d9e120842aa663c99..a1a5c166bc9b8b125bfd4e515ab7a16afad0bc65 100644 (file)
@@ -70,6 +70,7 @@ SECTIONS
                _stext = .;
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                IRQENTRY_TEXT
                SOFTIRQENTRY_TEXT
index 7552c255750659c2efd4e3d525dcce6267f3cd94..97958626152000cd24d171ea0fa0c3326f77ffe7 100644 (file)
@@ -43,6 +43,7 @@ SECTIONS
                HEAD_TEXT
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                *(.fixup)
                *(.text.__*)
index 7e958d829ec9810020c75eca7beef1cbfd44a3c2..aa6e573d57da46d1cb05bec37cb9dfcc7bfac13f 100644 (file)
@@ -63,6 +63,7 @@ SECTIONS
        *(.text..tlbmiss)
        TEXT_TEXT
        SCHED_TEXT
+       CPUIDLE_TEXT
        LOCK_TEXT
 #ifdef CONFIG_DEBUG_INFO
        INIT_TEXT
index cb5dfb02c88d412cd67b33827da834a3770d0c7f..7f11da1b895e1351fd93af0804ae228212bd57cd 100644 (file)
@@ -29,6 +29,7 @@ SECTIONS
        _stext = . ;
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
 #if defined(CONFIG_ROMKERNEL)
                *(.int_redirect)
index 5f268c1071b3df8f56b4fd4871fbb1b5b19e8d72..ec87e67feb19e4692337fa78c82cc67b6ffd0dce 100644 (file)
@@ -50,6 +50,7 @@ SECTIONS
                _text = .;
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                *(.fixup)
index 3686d6abafdefd3feb854656d5cf704f89be5340..853a5aca93898e7688b57d25666934a0b6a337f7 100644 (file)
@@ -50,10 +50,6 @@ CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
 # The gate DSO image is built using a special linker script.
 include $(src)/Makefile.gate
 
-# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
-define sed-y
-       "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
-endef
 quiet_cmd_nr_irqs = GEN     $@
 define cmd_nr_irqs
        (set -e; \
@@ -65,9 +61,7 @@ define cmd_nr_irqs
         echo " * This file was generated by Kbuild"; \
         echo " *"; \
         echo " */"; \
-        echo ""; \
-        sed -ne $(sed-y) $<; \
-        echo ""; \
+        sed -ne '/#define/{s/\$$//;s/#//2;s/$$/*\//p;}' $<; \
         echo "#endif" ) > $@
 endef
 
index dc506b05ffbdbf21f5eece65a723ecb178c4564c..f89d20c9741225e4d416713fd32819e6937c00e9 100644 (file)
@@ -46,6 +46,7 @@ SECTIONS {
                __end_ivt_text = .;
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                *(.gnu.linkonce.t*)
index 018e4a711d7927cef577e347dac93fb3a22afd75..ad1fe56455aae60b05b9e1a30294ccd2ab7b48ab 100644 (file)
@@ -31,6 +31,7 @@ SECTIONS
        HEAD_TEXT
        TEXT_TEXT
        SCHED_TEXT
+       CPUIDLE_TEXT
        LOCK_TEXT
        *(.fixup)
        *(.gnu.warning)
index 06a763f49fd34643d5330221a48011be00889b0e..d2c8abf1c8c4eac6688504f2a2dfaa7ea7177e65 100644 (file)
@@ -45,6 +45,7 @@ SECTIONS {
                HEAD_TEXT
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                *(.fixup)
                . = ALIGN(16);
index d0993594f558b3408317aede57c634c407ea580f..5b5ce1e4d1ed90b9e40dc68fa50c3b87671a8df0 100644 (file)
@@ -16,6 +16,7 @@ SECTIONS
        HEAD_TEXT
        TEXT_TEXT
        SCHED_TEXT
+       CPUIDLE_TEXT
        LOCK_TEXT
        *(.fixup)
        *(.gnu.warning)
index 8080469ee6c11c3e86ab59febaa4773790a2ffc7..fe5ea1974b16c2efcd34b0f134dcf17a24d388b5 100644 (file)
@@ -16,6 +16,7 @@ SECTIONS
        HEAD_TEXT
        TEXT_TEXT
        SCHED_TEXT
+       CPUIDLE_TEXT
        LOCK_TEXT
        *(.fixup)
        *(.gnu.warning)
index 150ace92c7ade9046c9c22b114a979356e7395ac..e6c700eaf207174544687895522cb5b0405b0a01 100644 (file)
@@ -21,6 +21,7 @@ SECTIONS
   .text : {
        TEXT_TEXT
        SCHED_TEXT
+       CPUIDLE_TEXT
        LOCK_TEXT
        KPROBES_TEXT
        IRQENTRY_TEXT
index 0a47f041055416a90d34a64be3208e3fd74b2b6c..289d0e7f3e3aaed8126c30d109c17c442b848f5b 100644 (file)
@@ -33,6 +33,7 @@ SECTIONS {
                EXIT_TEXT
                EXIT_CALL
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                IRQENTRY_TEXT
index 89be31ba70ed5e7a9dacdaf3e107c403cbb4f071..9a2db1c013d92e548fd04ef0a008ec442942350c 100644 (file)
@@ -267,6 +267,17 @@ static void octeon_crash_shutdown(struct pt_regs *regs)
        default_machine_crash_shutdown(regs);
 }
 
+#ifdef CONFIG_SMP
+void octeon_crash_smp_send_stop(void)
+{
+       int cpu;
+
+       /* disable watchdogs */
+       for_each_online_cpu(cpu)
+               cvmx_write_csr(CVMX_CIU_WDOGX(cpu_logical_map(cpu)), 0);
+}
+#endif
+
 #endif /* CONFIG_KEXEC */
 
 #ifdef CONFIG_CAVIUM_RESERVE32
@@ -911,6 +922,9 @@ void __init prom_init(void)
        _machine_kexec_shutdown = octeon_shutdown;
        _machine_crash_shutdown = octeon_crash_shutdown;
        _machine_kexec_prepare = octeon_kexec_prepare;
+#ifdef CONFIG_SMP
+       _crash_smp_send_stop = octeon_crash_smp_send_stop;
+#endif
 #endif
 
        octeon_user_io_init();
index 15e0fecbc300fd9752023931984fe277a73da876..6bf10e796553838bbd165dc31e117fb1ae9e4234 100644 (file)
@@ -51,7 +51,8 @@ extern int cp0_fdc_irq;
 
 extern int get_c0_fdc_int(void);
 
-void arch_trigger_all_cpu_backtrace(bool);
-#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
+                                   bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 
 #endif /* _ASM_IRQ_H */
index ee25ebbf2a28809c73f68311c24284c53fe87907..493a3cc7c39ad5a412d6d460061b740260dfff9c 100644 (file)
@@ -45,6 +45,7 @@ extern const unsigned char kexec_smp_wait[];
 extern unsigned long secondary_kexec_args[4];
 extern void (*relocated_kexec_smp_wait) (void *);
 extern atomic_t kexec_ready_to_reboot;
+extern void (*_crash_smp_send_stop)(void);
 #endif
 #endif
 
index 34c325c674c445306667ece3d7c422a51ff686c4..28ab364cd0b1fb246d15a1f2568cb020bdc3f0ea 100644 (file)
@@ -43,16 +43,4 @@ struct arch_uprobe_task {
        unsigned long saved_trap_nr;
 };
 
-extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup,
-       struct mm_struct *mm, unsigned long addr);
-extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
-extern int arch_uprobe_exception_notify(struct notifier_block *self,
-       unsigned long val, void *data);
-extern void arch_uprobe_abort_xol(struct arch_uprobe *aup,
-       struct pt_regs *regs);
-extern unsigned long arch_uretprobe_hijack_return_addr(
-       unsigned long trampoline_vaddr, struct pt_regs *regs);
-
 #endif /* __ASM_UPROBES_H */
index 610f0f3bdb3455cfa42629e98f8cc883524fe8ba..1723b17622976da35170caba062fe956a0fcd245 100644 (file)
@@ -47,9 +47,14 @@ static void crash_shutdown_secondary(void *passed_regs)
 
 static void crash_kexec_prepare_cpus(void)
 {
+       static int cpus_stopped;
        unsigned int msecs;
+       unsigned int ncpus;
 
-       unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+       if (cpus_stopped)
+               return;
+
+       ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
 
        dump_send_ipi(crash_shutdown_secondary);
        smp_wmb();
@@ -64,6 +69,17 @@ static void crash_kexec_prepare_cpus(void)
                cpu_relax();
                mdelay(1);
        }
+
+       cpus_stopped = 1;
+}
+
+/* Override the weak function in kernel/panic.c */
+void crash_smp_send_stop(void)
+{
+       if (_crash_smp_send_stop)
+               _crash_smp_send_stop();
+
+       crash_kexec_prepare_cpus();
 }
 
 #else /* !defined(CONFIG_SMP)  */
index 50980bf3983ef3654d4f24c1dbf95e4576d0c3a8..59725204105c2b50aa476e1d0e5cb2e61d73cb7c 100644 (file)
@@ -25,6 +25,7 @@ void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL;
 #ifdef CONFIG_SMP
 void (*relocated_kexec_smp_wait) (void *);
 atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
+void (*_crash_smp_send_stop)(void) = NULL;
 #endif
 
 int
index 7429ad09fbe3e1178ad37f0e34ed2f1651ac70ac..fea1fa7726e38443e68ad87a4fb61de9ec580944 100644 (file)
@@ -569,9 +569,16 @@ static void arch_dump_stack(void *info)
        dump_stack();
 }
 
-void arch_trigger_all_cpu_backtrace(bool include_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 {
-       smp_call_function(arch_dump_stack, NULL, 1);
+       long this_cpu = get_cpu();
+
+       if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
+               dump_stack();
+
+       smp_call_function_many(mask, arch_dump_stack, NULL, 1);
+
+       put_cpu();
 }
 
 int mips_get_process_fp_mode(struct task_struct *task)
index a82c178d0bb97c8e47b778945af384af620431ea..d5de67591735940c86f5e3b8ba2e186a3e94fbaa 100644 (file)
@@ -55,6 +55,7 @@ SECTIONS
        .text : {
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                IRQENTRY_TEXT
index 13c4814c29f840dd4ecb95e4e7206da7127aef49..2d5f1c3f1afbc54c9d34afb8396fc8bea326a295 100644 (file)
@@ -30,6 +30,7 @@ SECTIONS
        HEAD_TEXT
        TEXT_TEXT
        SCHED_TEXT
+       CPUIDLE_TEXT
        LOCK_TEXT
        KPROBES_TEXT
        *(.fixup)
index e23e89539967713c301c4f452f802f878bb7562f..6a8045bb1a77d06bd2fbc9456b7322c45765d04d 100644 (file)
@@ -37,6 +37,7 @@ SECTIONS
        .text : {
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                IRQENTRY_TEXT
                SOFTIRQENTRY_TEXT
index d936de4c07cadd73f00a0700a3781243b8f7852c..d68b9ede84231fe618062a68b190491cd25eeba6 100644 (file)
@@ -47,6 +47,7 @@ SECTIONS
           _stext = .;
          TEXT_TEXT
          SCHED_TEXT
+         CPUIDLE_TEXT
          LOCK_TEXT
          KPROBES_TEXT
          IRQENTRY_TEXT
index f3ead0b6ce461501508c351fdc3246f67f18e218..9ec8ec075daefd5a774eb4da097615dea9fa2ee2 100644 (file)
@@ -69,6 +69,7 @@ SECTIONS
        .text ALIGN(PAGE_SIZE) : {
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                IRQENTRY_TEXT
index 7b589178be46b293519bf2244c0e4dd6251fe9a1..4d52ccfc23665dbc10e45166906c1b70e1a0c317 100644 (file)
@@ -41,6 +41,9 @@ u64 memory_hotplug_max(void);
 #else
 #define memory_hotplug_max() memblock_end_of_DRAM()
 #endif /* CONFIG_NEED_MULTIPLE_NODES */
+#ifdef CONFIG_FA_DUMP
+#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_MMZONE_H_ */
index b3a663333d362eaddc97d5f73f4a9aa2a0fc249a..eeb80de1f9823ee87f0daa8a76d60d2be21010e0 100644 (file)
@@ -333,6 +333,11 @@ int __init fadump_reserve_mem(void)
        return 1;
 }
 
+unsigned long __init arch_reserved_kernel_pages(void)
+{
+       return memblock_reserved_size() / PAGE_SIZE;
+}
+
 /* Look for fadump= cmdline option. */
 static int __init early_fadump_param(char *p)
 {
index 37d6e741be826da51474ab50fe3027cc06400cd5..5f202a566ec5f0296a22a71ac238a63a6a90ba7f 100644 (file)
@@ -479,7 +479,8 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 
                /* Handle failure */
                if (unlikely(entry == DMA_ERROR_CODE)) {
-                       if (printk_ratelimit())
+                       if (!(attrs & DMA_ATTR_NO_WARN) &&
+                           printk_ratelimit())
                                dev_info(dev, "iommu_alloc failed, tbl %p "
                                         "vaddr %lx npages %lu\n", tbl, vaddr,
                                         npages);
@@ -776,7 +777,8 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
                                         mask >> tbl->it_page_shift, align,
                                         attrs);
                if (dma_handle == DMA_ERROR_CODE) {
-                       if (printk_ratelimit())  {
+                       if (!(attrs & DMA_ATTR_NO_WARN) &&
+                           printk_ratelimit())  {
                                dev_info(dev, "iommu_alloc failed, tbl %p "
                                         "vaddr %p npages %d\n", tbl, vaddr,
                                         npages);
index b5fba689fca65475a69db67f05008cb546633b38..7ed59f0d947f5f5098776b47f16236141e91f438 100644 (file)
@@ -52,6 +52,7 @@ SECTIONS
                /* careful! __ftr_alt_* sections need to be close to .text */
                *(.text .fixup __ftr_alt_* .ref.text)
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                IRQENTRY_TEXT
index 1411dff7fea75abd9b942c279ffc09929ae11fc7..658393c65d7e385a6b8ae60bc4b82503da7a1484 100644 (file)
@@ -29,14 +29,4 @@ struct arch_uprobe {
 struct arch_uprobe_task {
 };
 
-int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm,
-                            unsigned long addr);
-int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
-int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
-                                void *data);
-void arch_uprobe_abort_xol(struct arch_uprobe *ap, struct pt_regs *regs);
-unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
-                                               struct pt_regs *regs);
 #endif /* _ASM_UPROBES_H */
index 437e611592790b0ed725998d08281be420fcf954..0f9cd90c11af6173d78d4ace43465cc70332b18f 100644 (file)
@@ -189,7 +189,7 @@ static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info
        kgid_t kgid;
 
        for (i = 0; i < group_info->ngroups; i++) {
-               kgid = GROUP_AT(group_info, i);
+               kgid = group_info->gid[i];
                group = (u16)from_kgid_munged(user_ns, kgid);
                if (put_user(group, grouplist+i))
                        return -EFAULT;
@@ -213,7 +213,7 @@ static int groups16_from_user(struct group_info *group_info, u16 __user *groupli
                if (!gid_valid(kgid))
                        return -EINVAL;
 
-               GROUP_AT(group_info, i) = kgid;
+               group_info->gid[i] = kgid;
        }
 
        return 0;
index 429bfd1119612caa93fbbcf390768d9eaeedc7fe..000e6e91f6a0630c53f35519d450fccf02479e55 100644 (file)
@@ -35,6 +35,7 @@ SECTIONS
                HEAD_TEXT
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                IRQENTRY_TEXT
index 7274b5c4287ee4cbaad56a1eb41ad43a0d90c047..4117890b1db1a60dbfde5d26964a669cd53a2a80 100644 (file)
@@ -40,6 +40,7 @@ SECTIONS
                _text = .;      /* Text and read-only data */
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                *(.text.*)
index 235a4101999fe5a4462bd5ec1d78cba49eb49b90..5b9a3cc90c58a770cfd28e0c816048d975f3845e 100644 (file)
@@ -36,6 +36,7 @@ SECTIONS
                TEXT_TEXT
                EXTRA_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                IRQENTRY_TEXT
index 3f70f900e834203f0974169ebe95380442486d1f..1d51a11fb261cc17a1b05796b6bed1934f1ff589 100644 (file)
@@ -86,8 +86,9 @@ static inline unsigned long get_softint(void)
        return retval;
 }
 
-void arch_trigger_all_cpu_backtrace(bool);
-#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
+                                   bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 
 extern void *hardirq_stack[NR_CPUS];
 extern void *softirq_stack[NR_CPUS];
index fa14402b33f95f34d7c9f84f26ef23ea7c05aff1..47ff5588e5213748d34bd4d6876eeefb779aca12 100644 (file)
@@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
        }
 }
 
-void arch_trigger_all_cpu_backtrace(bool include_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 {
        struct thread_info *tp = current_thread_info();
        struct pt_regs *regs = get_irq_regs();
@@ -255,15 +255,15 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
 
        memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
 
-       if (include_self)
+       if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
                __global_reg_self(tp, regs, this_cpu);
 
        smp_fetch_global_regs();
 
-       for_each_online_cpu(cpu) {
+       for_each_cpu(cpu, mask) {
                struct global_reg_snapshot *gp;
 
-               if (!include_self && cpu == this_cpu)
+               if (exclude_self && cpu == this_cpu)
                        continue;
 
                gp = &global_cpu_snapshot[cpu].reg;
@@ -300,7 +300,7 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
 
 static void sysrq_handle_globreg(int key)
 {
-       arch_trigger_all_cpu_backtrace(true);
+       trigger_all_cpu_backtrace();
 }
 
 static struct sysrq_key_op sparc_globalreg_op = {
index d79b3b7342457b4eace6232b010226a1068a856c..572db686f845830a69f0815f7d274eae4927483c 100644 (file)
@@ -49,6 +49,7 @@ SECTIONS
                HEAD_TEXT
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                IRQENTRY_TEXT
index 84a924034bdbf816f5bb41442251ce13f23f30b5..1fa1f2544ff9224723a138e45140a89cdcd6186a 100644 (file)
@@ -79,8 +79,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type);
 void setup_irq_regs(void);
 
 #ifdef __tilegx__
-void arch_trigger_all_cpu_backtrace(bool self);
-#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
+                                   bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 #endif
 
 #endif /* _ASM_TILE_IRQ_H */
index 670a3569450fd39eca59decc0f626565acb3d0b6..101de132e363bec669ac5e542c9afd78d172b304 100644 (file)
@@ -50,7 +50,7 @@ STD_ENTRY(smp_nap)
  * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and
  * as a result return to the function that called _cpu_idle().
  */
-STD_ENTRY(_cpu_idle)
+STD_ENTRY_SECTION(_cpu_idle, .cpuidle.text)
        movei r1, 1
        IRQ_ENABLE_LOAD(r2, r3)
        mtspr INTERRUPT_CRITICAL_SECTION, r1
index db62cc34b955c89e8ecbf2a42c77ace78c77368f..81cf8743a3f3a35c94749bd4259b442e9d769350 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/spinlock.h>
 #include <linux/module.h>
 #include <linux/atomic.h>
-#include <linux/interrupt.h>
 
 #include <asm/processor.h>
 #include <asm/pmc.h>
@@ -29,9 +28,7 @@ int handle_perf_interrupt(struct pt_regs *regs, int fault)
        if (!perf_irq)
                panic("Unexpected PERF_COUNT interrupt %d\n", fault);
 
-       nmi_enter();
        retval = perf_irq(regs, fault);
-       nmi_exit();
        return retval;
 }
 
index a465d8372edda17448b8b9fa3c3920da9d1047b2..9f37106ef93ab0850408c06884311380020ffd45 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/compat.h>
-#include <linux/hardirq.h>
+#include <linux/nmi.h>
 #include <linux/syscalls.h>
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
@@ -594,66 +594,18 @@ void show_regs(struct pt_regs *regs)
        tile_show_stack(&kbt);
 }
 
-/* To ensure stack dump on tiles occurs one by one. */
-static DEFINE_SPINLOCK(backtrace_lock);
-/* To ensure no backtrace occurs before all of the stack dump are done. */
-static atomic_t backtrace_cpus;
-/* The cpu mask to avoid reentrance. */
-static struct cpumask backtrace_mask;
-
-void do_nmi_dump_stack(struct pt_regs *regs)
-{
-       int is_idle = is_idle_task(current) && !in_interrupt();
-       int cpu;
-
-       nmi_enter();
-       cpu = smp_processor_id();
-       if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
-               goto done;
-
-       spin_lock(&backtrace_lock);
-       if (is_idle)
-               pr_info("CPU: %d idle\n", cpu);
-       else
-               show_regs(regs);
-       spin_unlock(&backtrace_lock);
-       atomic_dec(&backtrace_cpus);
-done:
-       nmi_exit();
-}
-
 #ifdef __tilegx__
-void arch_trigger_all_cpu_backtrace(bool self)
+void nmi_raise_cpu_backtrace(struct cpumask *in_mask)
 {
        struct cpumask mask;
        HV_Coord tile;
        unsigned int timeout;
        int cpu;
-       int ongoing;
        HV_NMI_Info info[NR_CPUS];
 
-       ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
-       if (ongoing != 0) {
-               pr_err("Trying to do all-cpu backtrace.\n");
-               pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
-                      ongoing);
-               if (self) {
-                       pr_err("Reporting the stack on this cpu only.\n");
-                       dump_stack();
-               }
-               return;
-       }
-
-       cpumask_copy(&mask, cpu_online_mask);
-       cpumask_clear_cpu(smp_processor_id(), &mask);
-       cpumask_copy(&backtrace_mask, &mask);
-
-       /* Backtrace for myself first. */
-       if (self)
-               dump_stack();
-
        /* Tentatively dump stack on remote tiles via NMI. */
        timeout = 100;
+       cpumask_copy(&mask, in_mask);
        while (!cpumask_empty(&mask) && timeout) {
                for_each_cpu(cpu, &mask) {
                        tile.x = cpu_x(cpu);
@@ -664,12 +616,17 @@ void arch_trigger_all_cpu_backtrace(bool self)
                }
 
                mdelay(10);
+               touch_softlockup_watchdog();
                timeout--;
        }
 
-       /* Warn about cpus stuck in ICS and decrement their counts here. */
+       /* Warn about cpus stuck in ICS. */
        if (!cpumask_empty(&mask)) {
                for_each_cpu(cpu, &mask) {
+
+                       /* Clear the bit as if nmi_cpu_backtrace() ran. */
+                       cpumask_clear_cpu(cpu, in_mask);
+
                        switch (info[cpu].result) {
                        case HV_NMI_RESULT_FAIL_ICS:
                                pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
@@ -680,16 +637,20 @@ void arch_trigger_all_cpu_backtrace(bool self)
                                        cpu);
                                break;
                        case HV_ENOSYS:
-                               pr_warn("Hypervisor too old to allow remote stack dumps.\n");
-                               goto skip_for_each;
+                               WARN_ONCE(1, "Hypervisor too old to allow remote stack dumps.\n");
+                               break;
                        default:  /* should not happen */
                                pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
                                        cpu, info[cpu].result, info[cpu].pc);
                                break;
                        }
                }
-skip_for_each:
-               atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
        }
 }
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+       nmi_trigger_cpumask_backtrace(mask, exclude_self,
+                                     nmi_raise_cpu_backtrace);
+}
 #endif /* __tilegx_ */
index 4d9651c5b1adc38983a1252aace956c92a2fb70f..39f427bb0de2db871d66b935f59e9790d322b0da 100644 (file)
@@ -20,6 +20,8 @@
 #include <linux/reboot.h>
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/nmi.h>
 #include <asm/stack.h>
 #include <asm/traps.h>
 #include <asm/setup.h>
@@ -392,14 +394,17 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 
 void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
 {
+       nmi_enter();
        switch (reason) {
+#ifdef arch_trigger_cpumask_backtrace
        case TILE_NMI_DUMP_STACK:
-               do_nmi_dump_stack(regs);
+               nmi_cpu_backtrace(regs);
                break;
+#endif
        default:
                panic("Unexpected do_nmi type %ld", reason);
-               return;
        }
+       nmi_exit();
 }
 
 /* Deprecated function currently only used here. */
index 9d449caf8910c709a983a61ada803aaa1fd1dcfa..e1baf094fba45f53f34aabf4730e315e7cc13d98 100644 (file)
@@ -42,6 +42,7 @@ SECTIONS
   .text : AT (ADDR(.text) - LOAD_OFFSET) {
     HEAD_TEXT
     SCHED_TEXT
+    CPUIDLE_TEXT
     LOCK_TEXT
     KPROBES_TEXT
     IRQENTRY_TEXT
index 851a94e6ae58061824917c82ee2ae0d8fe0f4b41..ef61c597898bc02e92cf352663b0d819bea2164c 100644 (file)
@@ -88,6 +88,5 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-       unsigned long range_end = mm->brk + 0x02000000;
-       return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+       return randomize_page(mm->brk, 0x02000000);
 }
index 0ca46ededfc73ce742c4075e2cc08bdb1e93cd0e..c5069e2811fba8d1af70fe944ff4c58a1c168668 100644 (file)
@@ -158,7 +158,7 @@ define filechk_gen-asm-offsets
          echo " *"; \
          echo " */"; \
          echo ""; \
-         sed -ne "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"; \
+         sed -ne '/#define/{s/\$$//;s/#//2;s/$$/*\//p;}'; \
          echo ""; )
 endef
 
index adde088aeefff64f5820fcf1636e5b3afa3577ab..4fdbcf958cd5ad76b9756cb5f201fa384f119b68 100644 (file)
@@ -68,6 +68,7 @@ SECTIONS
     _stext = .;
     TEXT_TEXT
     SCHED_TEXT
+    CPUIDLE_TEXT
     LOCK_TEXT
     *(.fixup)
     *(.stub .text.* .gnu.linkonce.t.*)
index 6899195602b77fd3ed259f41977a2984faa902f4..1840f55ed0420b9ac2cc5db93d31a0b5a93fd48e 100644 (file)
@@ -28,6 +28,7 @@ SECTIONS
     _stext = .;
     TEXT_TEXT
     SCHED_TEXT
+    CPUIDLE_TEXT
     LOCK_TEXT
     *(.fixup)
     /* .gnu.warning sections are handled specially by elf32.em.  */
index 00299c927852ea72cc743e97bff6e2f3273c9bf8..d7c6b676b3a56a44cea03b73e401d04b0860eba1 100644 (file)
@@ -295,8 +295,7 @@ unsigned long get_wchan(struct task_struct *p)
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-       unsigned long range_end = mm->brk + 0x02000000;
-       return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+       return randomize_page(mm->brk, 0x02000000);
 }
 
 /*
index 77e407e49a632c84cfb4957580c13dfbbebd1df4..56e788e8ee83cd8f56e54a186981a770fe0bba04 100644 (file)
@@ -37,6 +37,7 @@ SECTIONS
        .text : {               /* Real text segment */
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
 
                *(.fixup)
index e7de5c9a4fbdd2c5d99b78d82d2c58019d20dc04..16d3fa211962809c4e879d049dbdb5dde4049702 100644 (file)
@@ -50,8 +50,9 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
 extern void init_ISA_irqs(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
-void arch_trigger_all_cpu_backtrace(bool);
-#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
+                                   bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 #endif
 
 #endif /* _ASM_X86_IRQ_H */
index b77f5edb03b0c02dc2047d52d07c9da447dba209..ac7692dcfa2e86196f47de38035731e39696d920 100644 (file)
@@ -4,6 +4,10 @@
 #include <asm/processor-flags.h>
 
 #ifndef __ASSEMBLY__
+
+/* Provide __cpuidle; we can't safely include <linux/cpu.h> */
+#define __cpuidle __attribute__((__section__(".cpuidle.text")))
+
 /*
  * Interrupt control:
  */
@@ -44,12 +48,12 @@ static inline void native_irq_enable(void)
        asm volatile("sti": : :"memory");
 }
 
-static inline void native_safe_halt(void)
+static inline __cpuidle void native_safe_halt(void)
 {
        asm volatile("sti; hlt": : :"memory");
 }
 
-static inline void native_halt(void)
+static inline __cpuidle void native_halt(void)
 {
        asm volatile("hlt": : :"memory");
 }
@@ -86,7 +90,7 @@ static inline notrace void arch_local_irq_enable(void)
  * Used in the idle loop; sti takes one instruction cycle
  * to complete:
  */
-static inline void arch_safe_halt(void)
+static inline __cpuidle void arch_safe_halt(void)
 {
        native_safe_halt();
 }
@@ -95,7 +99,7 @@ static inline void arch_safe_halt(void)
  * Used when interrupts are already enabled or to
  * shutdown the processor:
  */
-static inline void halt(void)
+static inline __cpuidle void halt(void)
 {
        native_halt();
 }
index d2434c1cad0558e2664d8f7587ed4701d049edd5..282630e4c6ea4696e54c6ea650751d913ce49c11 100644 (file)
@@ -210,6 +210,7 @@ struct kexec_entry64_regs {
 
 typedef void crash_vmclear_fn(void);
 extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
+extern void kdump_nmi_shootdown_cpus(void);
 
 #endif /* __ASSEMBLY__ */
 
index 19980b36f394b18e6816629390130fa3eb789115..026ea82ecc60492e1ee27eae439fb2b9c6402899 100644 (file)
@@ -47,6 +47,7 @@ struct smp_ops {
        void (*smp_cpus_done)(unsigned max_cpus);
 
        void (*stop_other_cpus)(int wait);
+       void (*crash_stop_other_cpus)(void);
        void (*smp_send_reschedule)(int cpu);
 
        int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
index bdfad642123f247d9de09d2d4d7332aa14098d1e..af15f4444330b6c69fe20fc71a993070a9f26a5d 100644 (file)
@@ -152,7 +152,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 }
 EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
 
-void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
+void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
 {
        unsigned int cpu = smp_processor_id();
        struct cstate_entry *percpu_entry;
index f29501e1a5c131827527d41a517bd508484033ba..c73c9fb281e18f7d058fd69497f504adeb3147c1 100644 (file)
@@ -26,32 +26,32 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
 }
 #endif
 
-#ifdef arch_trigger_all_cpu_backtrace
+#ifdef arch_trigger_cpumask_backtrace
 static void nmi_raise_cpu_backtrace(cpumask_t *mask)
 {
        apic->send_IPI_mask(mask, NMI_VECTOR);
 }
 
-void arch_trigger_all_cpu_backtrace(bool include_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 {
-       nmi_trigger_all_cpu_backtrace(include_self, nmi_raise_cpu_backtrace);
+       nmi_trigger_cpumask_backtrace(mask, exclude_self,
+                                     nmi_raise_cpu_backtrace);
 }
 
-static int
-arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
+static int nmi_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
 {
        if (nmi_cpu_backtrace(regs))
                return NMI_HANDLED;
 
        return NMI_DONE;
 }
-NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler);
+NOKPROBE_SYMBOL(nmi_cpu_backtrace_handler);
 
-static int __init register_trigger_all_cpu_backtrace(void)
+static int __init register_nmi_cpu_backtrace_handler(void)
 {
-       register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler,
+       register_nmi_handler(NMI_LOCAL, nmi_cpu_backtrace_handler,
                                0, "arch_bt");
        return 0;
 }
-early_initcall(register_trigger_all_cpu_backtrace);
+early_initcall(register_nmi_cpu_backtrace_handler);
 #endif
index 9616cf76940cd48b674fe326a24597767ebc9fd7..650830e39e3a7c8f8e01755f4ecf2bd5f83bb799 100644 (file)
@@ -133,15 +133,31 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
        disable_local_APIC();
 }
 
-static void kdump_nmi_shootdown_cpus(void)
+void kdump_nmi_shootdown_cpus(void)
 {
        nmi_shootdown_cpus(kdump_nmi_callback);
 
        disable_local_APIC();
 }
 
+/* Override the weak function in kernel/panic.c */
+void crash_smp_send_stop(void)
+{
+       static int cpus_stopped;
+
+       if (cpus_stopped)
+               return;
+
+       if (smp_ops.crash_stop_other_cpus)
+               smp_ops.crash_stop_other_cpus();
+       else
+               smp_send_stop();
+
+       cpus_stopped = 1;
+}
+
 #else
-static void kdump_nmi_shootdown_cpus(void)
+void crash_smp_send_stop(void)
 {
        /* There are no cpus to shootdown */
 }
@@ -160,7 +176,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
        /* The kernel is broken so disable interrupts */
        local_irq_disable();
 
-       kdump_nmi_shootdown_cpus();
+       crash_smp_send_stop();
 
        /*
         * VMCLEAR VMCSs loaded on this cpu if needed.
index 871f1863457dccdf2fdcdf310aae774ab1b3c9d2..a1b4da92921c75b053399880e5bcd0e9ce071e4e 100644 (file)
@@ -1188,6 +1188,6 @@ void __init memblock_find_dma_reserve(void)
                        nr_free_pages += end_pfn - start_pfn;
        }
 
-       set_dma_reserve(nr_pages - nr_free_pages);
+       set_memory_reserve(nr_pages - nr_free_pages, false);
 #endif
 }
index 5a294e48b18529045ae4371f4ef8dc281773aea8..fc3389fc47a2f3dfacc7bff0665f1d6c4b75a245 100644 (file)
@@ -337,6 +337,7 @@ void arch_crash_save_vmcoreinfo(void)
 #endif
        vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
                              kaslr_offset());
+       VMCOREINFO_PHYS_BASE(phys_base);
 }
 
 /* arch-dependent functionality related to kexec file-based syscall */
index c1fa790c81cd51a0cb5f5b1650a749c3b86300ee..5308fb39e3040dae0075e96c5bf78428806fa55c 100644 (file)
@@ -302,7 +302,7 @@ void arch_cpu_idle(void)
 /*
  * We use this if we don't have any better idle routine..
  */
-void default_idle(void)
+void __cpuidle default_idle(void)
 {
        trace_cpu_idle_rcuidle(1, smp_processor_id());
        safe_halt();
@@ -417,7 +417,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
  * with interrupts enabled and no flags, which is backwards compatible with the
  * original MWAIT implementation.
  */
-static void mwait_idle(void)
+static __cpuidle void mwait_idle(void)
 {
        if (!current_set_polling_and_test()) {
                trace_cpu_idle_rcuidle(1, smp_processor_id());
@@ -509,8 +509,7 @@ unsigned long arch_align_stack(unsigned long sp)
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-       unsigned long range_end = mm->brk + 0x02000000;
-       return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+       return randomize_page(mm->brk, 0x02000000);
 }
 
 /*
index 658777cf38512872b4e5d049a8618e4bbd1b869b..68f8cc222f255aa1cf5266e2d84d7ceeb2417977 100644 (file)
@@ -32,6 +32,8 @@
 #include <asm/nmi.h>
 #include <asm/mce.h>
 #include <asm/trace/irq_vectors.h>
+#include <asm/kexec.h>
+
 /*
  *     Some notes on x86 processor bugs affecting SMP operation:
  *
@@ -342,6 +344,9 @@ struct smp_ops smp_ops = {
        .smp_cpus_done          = native_smp_cpus_done,
 
        .stop_other_cpus        = native_stop_other_cpus,
+#if defined(CONFIG_KEXEC_CORE)
+       .crash_stop_other_cpus  = kdump_nmi_shootdown_cpus,
+#endif
        .smp_send_reschedule    = native_smp_send_reschedule,
 
        .cpu_up                 = native_cpu_up,
index 10e0272d789a189b7215100a1d66a676d9b4bbfa..a55ed63b9f91b0d45dbb476a22af9a19c4ab5fc8 100644 (file)
@@ -101,7 +101,6 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
                           unsigned long *end)
 {
        if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) {
-               unsigned long new_begin;
                /* This is usually used needed to map code in small
                   model, so it needs to be in the first 31bit. Limit
                   it to that.  This means we need to move the
@@ -112,9 +111,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
                *begin = 0x40000000;
                *end = 0x80000000;
                if (current->flags & PF_RANDOMIZE) {
-                       new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
-                       if (new_begin)
-                               *begin = new_begin;
+                       *begin = randomize_page(*begin, 0x02000000);
                }
        } else {
                *begin = current->mm->mmap_legacy_base;
index 9297a002d8e5ff3f06b8b8fee118b21f9042a994..dbf67f64d5ecf76cee128b2c89d43a4267b57223 100644 (file)
@@ -97,6 +97,7 @@ SECTIONS
                _stext = .;
                TEXT_TEXT
                SCHED_TEXT
+               CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
                ENTRY_TEXT
index 46a9df99f3c5c952601d7f153adf6bf8c1e3e937..462473f6492fc5c3d64b3e6176d3c057150a5a05 100644 (file)
@@ -5,7 +5,7 @@
 #include <asm/mman.h>
 
 #define DEFINE(sym, val) \
-       asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+       asm volatile ("#define " #sym " %0 /*" #val :: "i" (val))
 
 #define BLANK() asm volatile("\n->" : : )
 
index cb3c22370cf58bdf25e2ce895371eb5c7ab9597a..0f5e25cf73214d9cd2cfbc0b7184fdedb9c7537b 100644 (file)
@@ -21,10 +21,10 @@ static char syscalls[] = {
 #endif
 
 #define DEFINE(sym, val) \
-       asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+       asm volatile ("#define " #sym " %0 /*" #val :: "i" (val))
 
 #define DEFINE_LONGS(sym, val) \
-       asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
+       asm volatile ("#define " #sym " %0 /*" #val :: "i" (val / sizeof(unsigned long)))
 
 void foo(void)
 {
index 72cfe3587dd865c6532874dc56f012ccbb7cdc04..31411fc82662c82ec127188ccdb5fc3ac818e4e9 100644 (file)
@@ -89,6 +89,9 @@ SECTIONS
     VMLINUX_SYMBOL(__sched_text_start) = .;
     *(.sched.literal .sched.text)
     VMLINUX_SYMBOL(__sched_text_end) = .;
+    VMLINUX_SYMBOL(__cpuidle_text_start) = .;
+    *(.cpuidle.literal .cpuidle.text)
+    VMLINUX_SYMBOL(__cpuidle_text_end) = .;
     VMLINUX_SYMBOL(__lock_text_start) = .;
     *(.spinlock.literal .spinlock.text)
     VMLINUX_SYMBOL(__lock_text_end) = .;
index fcd6d4fae657cfdf118274cec31780f9e8b8c1f8..a178c8e59492d7be9fa30e36f4bd87910f79c99b 100644 (file)
@@ -878,7 +878,7 @@ static int show_partition(struct seq_file *seqf, void *v)
        char buf[BDEVNAME_SIZE];
 
        /* Don't show non-partitionable removeable devices or empty devices */
-       if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+       if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
                                   (sgp->flags & GENHD_FL_REMOVABLE)))
                return 0;
        if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
index cea52528aa188648a8cb1905da58a1e5bcb6719a..2237d3f24f0e735a1e04294ea273595cce98e049 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/sched.h>       /* need_resched() */
 #include <linux/tick.h>
 #include <linux/cpuidle.h>
+#include <linux/cpu.h>
 #include <acpi/processor.h>
 
 /*
@@ -115,7 +116,7 @@ static const struct dmi_system_id processor_power_dmi_table[] = {
  * Callers should disable interrupts before the call and enable
  * interrupts after return.
  */
-static void acpi_safe_halt(void)
+static void __cpuidle acpi_safe_halt(void)
 {
        if (!tif_need_resched()) {
                safe_halt();
@@ -645,7 +646,7 @@ static int acpi_idle_bm_check(void)
  *
  * Caller disables interrupt before call and enables interrupt after return.
  */
-static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
+static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
        if (cx->entry_method == ACPI_CSTATE_FFH) {
                /* Call into architectural FFH based C-state */
index dc75de9059cd1983401b35bffb5c697d2e55b813..62c63c0c5c22dcfee3e2d706e68be053519cc40c 100644 (file)
@@ -361,8 +361,11 @@ store_mem_state(struct device *dev,
 err:
        unlock_device_hotplug();
 
-       if (ret)
+       if (ret < 0)
                return ret;
+       if (ret)
+               return -EINVAL;
+
        return count;
 }
 
index 7274ae89ddb39f3450ac9f56ef64db6384330497..d6876d50622075f1b490bf7a19831b4e8784adb6 100644 (file)
@@ -2100,23 +2100,37 @@ unsigned long get_random_long(void)
 }
 EXPORT_SYMBOL(get_random_long);
 
-/*
- * randomize_range() returns a start address such that
+/**
+ * randomize_page - Generate a random, page aligned address
+ * @start:     The smallest acceptable address the caller will take.
+ * @range:     The size of the area, starting at @start, within which the
+ *             random address must fall.
+ *
+ * If @start + @range would overflow, @range is capped.
  *
- *    [...... <range> .....]
- *  start                  end
+ * NOTE: Historical use of randomize_range, which this replaces, presumed that
+ * @start was already page aligned.  We now align it regardless.
  *
- * a <range> with size "len" starting at the return value is inside in the
- * area defined by [start, end], but is otherwise randomized.
+ * Return: A page aligned address within [start, start + range).  On error,
+ * @start is returned.
  */
 unsigned long
-randomize_range(unsigned long start, unsigned long end, unsigned long len)
+randomize_page(unsigned long start, unsigned long range)
 {
-       unsigned long range = end - len - start;
+       if (!PAGE_ALIGNED(start)) {
+               range -= PAGE_ALIGN(start) - start;
+               start = PAGE_ALIGN(start);
+       }
 
-       if (end <= start + len)
-               return 0;
-       return PAGE_ALIGN(get_random_int() % range + start);
+       if (start > ULONG_MAX - range)
+               range = ULONG_MAX - start;
+
+       range >>= PAGE_SHIFT;
+
+       if (range == 0)
+               return start;
+
+       return start + (get_random_long() % range << PAGE_SHIFT);
 }
 
 /* Interface for in-kernel drivers of true hardware RNGs.
index 389ade4572beb17c71ff44faf2ce884ad16baf54..ab264d393233683c28bac7e0770abb0ffe49b2ec 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/cpuidle.h>
 #include <linux/cpumask.h>
 #include <linux/tick.h>
+#include <linux/cpu.h>
 
 #include "cpuidle.h"
 
@@ -178,8 +179,8 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
 }
 
 #ifdef CONFIG_ARCH_HAS_CPU_RELAX
-static int poll_idle(struct cpuidle_device *dev,
-               struct cpuidle_driver *drv, int index)
+static int __cpuidle poll_idle(struct cpuidle_device *dev,
+                              struct cpuidle_driver *drv, int index)
 {
        local_irq_enable();
        if (!current_set_polling_and_test()) {
index 67ec58f9ef99f7c2fbc2115a8d6e8804a37327e1..4466a2f969d7ba5d5cafee13b7359047955cd07a 100644 (file)
@@ -863,8 +863,8 @@ static struct cpuidle_state dnv_cstates[] = {
  *
  * Must be called under local_irq_disable().
  */
-static int intel_idle(struct cpuidle_device *dev,
-               struct cpuidle_driver *drv, int index)
+static __cpuidle int intel_idle(struct cpuidle_device *dev,
+                               struct cpuidle_driver *drv, int index)
 {
        unsigned long ecx = 1; /* break on interrupt flag */
        struct cpuidle_state *state = &drv->states[index];
index 8dcf5a960951805b09d650b2cc243ceaeff6a5bb..4a7e8d306174e46bdcbe98b1a4f9d4dc589e970f 100644 (file)
@@ -503,7 +503,8 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
                goto out;
 
        ret = BLK_MQ_RQ_QUEUE_BUSY;
-       if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir))
+       if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
+                               DMA_ATTR_NO_WARN))
                goto out;
 
        if (!nvme_setup_prps(dev, req, size))
index 3ce69536a7b3c3832f652ff7b2c43bd9f0db506e..ff37f6d6e70625f14ee9253baa038587cdd885ce 100644 (file)
@@ -2010,6 +2010,8 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
                        name = of_get_property(of_aliases, "stdout", NULL);
                if (name)
                        of_stdout = of_find_node_opts_by_path(name, &of_stdout_options);
+               if (of_stdout)
+                       console_set_by_of();
        }
 
        if (!of_aliases)
index 3fa17ac8df5492f4d2e4681d1a4f07f2f8defffa..3226983c2a1152a44372969d30e986bb97332d81 100644 (file)
@@ -1841,24 +1841,19 @@ static int cm_chan_msg_send(void __user *arg)
 {
        struct rio_cm_msg msg;
        void *buf;
-       int ret = 0;
+       int ret;
 
        if (copy_from_user(&msg, arg, sizeof(msg)))
                return -EFAULT;
        if (msg.size > RIO_MAX_MSG_SIZE)
                return -EINVAL;
 
-       buf = kmalloc(msg.size, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
-
-       if (copy_from_user(buf, (void __user *)(uintptr_t)msg.msg, msg.size)) {
-               ret = -EFAULT;
-               goto out;
-       }
+       buf = memdup_user((void __user *)(uintptr_t)msg.msg, msg.size);
+       if (IS_ERR(buf))
+               return PTR_ERR(buf);
 
        ret = riocm_ch_send(msg.ch_num, buf, msg.size);
-out:
+
        kfree(buf);
        return ret;
 }
index dbd819fa6b755ddeadd83a4e81e6a6342f5eea0a..6547c9483f4368503646aedf023b1b75fdd66b2e 100644 (file)
@@ -2204,7 +2204,7 @@ int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
        task_lock(current);
        if (pud->pud_ngroups > current_ngroups)
                pud->pud_ngroups = current_ngroups;
-       memcpy(pud->pud_groups, current_cred()->group_info->blocks[0],
+       memcpy(pud->pud_groups, current_cred()->group_info->gid,
               pud->pud_ngroups * sizeof(__u32));
        task_unlock(current);
 
index a439548de785dde9497ed13b2063b696ed773d7f..a1fba4285277dde3af647d778b8aefd4c36d2261 100644 (file)
@@ -20,7 +20,8 @@
 #define AUTOFS_IOC_COUNT     32
 
 #define AUTOFS_DEV_IOCTL_IOC_FIRST     (AUTOFS_DEV_IOCTL_VERSION)
-#define AUTOFS_DEV_IOCTL_IOC_COUNT     (AUTOFS_IOC_COUNT - 11)
+#define AUTOFS_DEV_IOCTL_IOC_COUNT \
+       (AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD - AUTOFS_DEV_IOCTL_VERSION_CMD)
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -33,8 +34,6 @@
 #include <asm/current.h>
 #include <linux/uaccess.h>
 
-/* #define DEBUG */
-
 #ifdef pr_fmt
 #undef pr_fmt
 #endif
@@ -111,8 +110,6 @@ struct autofs_sb_info {
        int max_proto;
        unsigned long exp_timeout;
        unsigned int type;
-       int reghost_enabled;
-       int needs_reghost;
        struct super_block *sb;
        struct mutex wq_mutex;
        struct mutex pipe_mutex;
@@ -271,4 +268,4 @@ static inline void autofs4_del_expiring(struct dentry *dentry)
        }
 }
 
-extern void autofs4_kill_sb(struct super_block *);
+void autofs4_kill_sb(struct super_block *);
index c7fcc743884374cf695f530036701610d78d79a7..fc09eb77ddf37a4ae27af402553220e4287af92e 100644 (file)
@@ -75,7 +75,7 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
        if ((param->ver_major != AUTOFS_DEV_IOCTL_VERSION_MAJOR) ||
            (param->ver_minor > AUTOFS_DEV_IOCTL_VERSION_MINOR)) {
                pr_warn("ioctl control interface version mismatch: "
-                       "kernel(%u.%u), user(%u.%u), cmd(%d)\n",
+                       "kernel(%u.%u), user(%u.%u), cmd(0x%08x)\n",
                        AUTOFS_DEV_IOCTL_VERSION_MAJOR,
                        AUTOFS_DEV_IOCTL_VERSION_MINOR,
                        param->ver_major, param->ver_minor, cmd);
@@ -172,6 +172,17 @@ static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
        return sbi;
 }
 
+/* Return autofs dev ioctl version */
+static int autofs_dev_ioctl_version(struct file *fp,
+                                   struct autofs_sb_info *sbi,
+                                   struct autofs_dev_ioctl *param)
+{
+       /* This should have already been set. */
+       param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+       param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+       return 0;
+}
+
 /* Return autofs module protocol version */
 static int autofs_dev_ioctl_protover(struct file *fp,
                                     struct autofs_sb_info *sbi,
@@ -586,41 +597,25 @@ out:
 
 static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
 {
-       static struct {
-               int cmd;
-               ioctl_fn fn;
-       } _ioctls[] = {
-               {cmd_idx(AUTOFS_DEV_IOCTL_VERSION_CMD), NULL},
-               {cmd_idx(AUTOFS_DEV_IOCTL_PROTOVER_CMD),
-                        autofs_dev_ioctl_protover},
-               {cmd_idx(AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD),
-                        autofs_dev_ioctl_protosubver},
-               {cmd_idx(AUTOFS_DEV_IOCTL_OPENMOUNT_CMD),
-                        autofs_dev_ioctl_openmount},
-               {cmd_idx(AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD),
-                        autofs_dev_ioctl_closemount},
-               {cmd_idx(AUTOFS_DEV_IOCTL_READY_CMD),
-                        autofs_dev_ioctl_ready},
-               {cmd_idx(AUTOFS_DEV_IOCTL_FAIL_CMD),
-                        autofs_dev_ioctl_fail},
-               {cmd_idx(AUTOFS_DEV_IOCTL_SETPIPEFD_CMD),
-                        autofs_dev_ioctl_setpipefd},
-               {cmd_idx(AUTOFS_DEV_IOCTL_CATATONIC_CMD),
-                        autofs_dev_ioctl_catatonic},
-               {cmd_idx(AUTOFS_DEV_IOCTL_TIMEOUT_CMD),
-                        autofs_dev_ioctl_timeout},
-               {cmd_idx(AUTOFS_DEV_IOCTL_REQUESTER_CMD),
-                        autofs_dev_ioctl_requester},
-               {cmd_idx(AUTOFS_DEV_IOCTL_EXPIRE_CMD),
-                        autofs_dev_ioctl_expire},
-               {cmd_idx(AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD),
-                        autofs_dev_ioctl_askumount},
-               {cmd_idx(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD),
-                        autofs_dev_ioctl_ismountpoint}
+       static ioctl_fn _ioctls[] = {
+               autofs_dev_ioctl_version,
+               autofs_dev_ioctl_protover,
+               autofs_dev_ioctl_protosubver,
+               autofs_dev_ioctl_openmount,
+               autofs_dev_ioctl_closemount,
+               autofs_dev_ioctl_ready,
+               autofs_dev_ioctl_fail,
+               autofs_dev_ioctl_setpipefd,
+               autofs_dev_ioctl_catatonic,
+               autofs_dev_ioctl_timeout,
+               autofs_dev_ioctl_requester,
+               autofs_dev_ioctl_expire,
+               autofs_dev_ioctl_askumount,
+               autofs_dev_ioctl_ismountpoint,
        };
        unsigned int idx = cmd_idx(cmd);
 
-       return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx].fn;
+       return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx];
 }
 
 /* ioctl dispatcher */
@@ -642,7 +637,7 @@ static int _autofs_dev_ioctl(unsigned int command,
        cmd = _IOC_NR(command);
 
        if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) ||
-           cmd - cmd_first >= AUTOFS_DEV_IOCTL_IOC_COUNT) {
+           cmd - cmd_first > AUTOFS_DEV_IOCTL_IOC_COUNT) {
                return -ENOTTY;
        }
 
@@ -655,14 +650,11 @@ static int _autofs_dev_ioctl(unsigned int command,
        if (err)
                goto out;
 
-       /* The validate routine above always sets the version */
-       if (cmd == AUTOFS_DEV_IOCTL_VERSION_CMD)
-               goto done;
-
        fn = lookup_dev_ioctl(cmd);
        if (!fn) {
                pr_warn("unknown command 0x%08x\n", command);
-               return -ENOTTY;
+               err = -ENOTTY;
+               goto out;
        }
 
        fp = NULL;
@@ -671,9 +663,11 @@ static int _autofs_dev_ioctl(unsigned int command,
        /*
         * For obvious reasons the openmount can't have a file
         * descriptor yet. We don't take a reference to the
-        * file during close to allow for immediate release.
+        * file during close to allow for immediate release,
+        * and the same for retrieving ioctl version.
         */
-       if (cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD &&
+       if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD &&
+           cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD &&
            cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) {
                fp = fget(param->ioctlfd);
                if (!fp) {
@@ -706,7 +700,6 @@ cont:
 
        if (fp)
                fput(fp);
-done:
        if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE))
                err = -EFAULT;
 out:
index 61b21051bd5ad9322a58ef1caa7cccc09d25ba2b..f35ead78ba067556fcc89564258b3801da071fa4 100644 (file)
@@ -274,6 +274,23 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
                goto fail_dput;
        }
 
+       /* Test versions first */
+       if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
+           sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
+               pr_err("kernel does not match daemon version "
+                      "daemon (%d, %d) kernel (%d, %d)\n",
+                      sbi->min_proto, sbi->max_proto,
+                      AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
+               goto fail_dput;
+       }
+
+       /* Establish highest kernel protocol version */
+       if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION)
+               sbi->version = AUTOFS_MAX_PROTO_VERSION;
+       else
+               sbi->version = sbi->max_proto;
+       sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
+
        if (pgrp_set) {
                sbi->oz_pgrp = find_get_pid(pgrp);
                if (!sbi->oz_pgrp) {
@@ -291,29 +308,12 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
        root_inode->i_fop = &autofs4_root_operations;
        root_inode->i_op = &autofs4_dir_inode_operations;
 
-       /* Couldn't this be tested earlier? */
-       if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
-           sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
-               pr_err("kernel does not match daemon version "
-                      "daemon (%d, %d) kernel (%d, %d)\n",
-                      sbi->min_proto, sbi->max_proto,
-                      AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
-               goto fail_dput;
-       }
-
-       /* Establish highest kernel protocol version */
-       if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION)
-               sbi->version = AUTOFS_MAX_PROTO_VERSION;
-       else
-               sbi->version = sbi->max_proto;
-       sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
-
        pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp));
        pipe = fget(pipefd);
 
        if (!pipe) {
                pr_err("could not open pipe file descriptor\n");
-               goto fail_dput;
+               goto fail_put_pid;
        }
        ret = autofs_prepare_pipe(pipe);
        if (ret < 0)
@@ -334,14 +334,14 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 fail_fput:
        pr_err("pipe file descriptor does not contain proper ops\n");
        fput(pipe);
-       /* fall through */
+fail_put_pid:
+       put_pid(sbi->oz_pgrp);
 fail_dput:
        dput(root);
        goto fail_free;
 fail_ino:
-       kfree(ino);
+       autofs4_free_ino(ino);
 fail_free:
-       put_pid(sbi->oz_pgrp);
        kfree(sbi);
        s->s_fs_info = NULL;
        return ret;
@@ -368,7 +368,8 @@ struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
                inode->i_fop = &autofs4_dir_operations;
        } else if (S_ISLNK(mode)) {
                inode->i_op = &autofs4_symlink_inode_operations;
-       }
+       } else
+               WARN_ON(1);
 
        return inode;
 }
index fa84bb8832e0baefc79c07759e9f7f518e023988..d25c55f78173c6512fc9ec966beac2157b70b0fc 100644 (file)
@@ -577,8 +577,6 @@ static int autofs4_dir_symlink(struct inode *dir,
        inode = autofs4_get_inode(dir->i_sb, S_IFLNK | 0555);
        if (!inode) {
                kfree(cp);
-               if (!dentry->d_fsdata)
-                       kfree(ino);
                return -ENOMEM;
        }
        inode->i_private = cp;
@@ -842,7 +840,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
        if (may_umount(mnt))
                status = 1;
 
-       pr_debug("returning %d\n", status);
+       pr_debug("may umount %d\n", status);
 
        status = put_user(status, p);
 
index be6e48b0a46c269d55b4befcdedaf1750e1e233f..e079689743918e39d4895c5bb4419c46428449ba 100644 (file)
 #include <asm/ioctls.h>
 #include "internal.h"
 
-int compat_log = 1;
-
-int compat_printk(const char *fmt, ...)
-{
-       va_list ap;
-       int ret;
-       if (!compat_log)
-               return 0;
-       va_start(ap, fmt);
-       ret = vprintk(fmt, ap);
-       va_end(ap);
-       return ret;
-}
-
 /*
  * Not all architectures have sys_utime, so implement this in terms
  * of sys_utimes.
index 993dc6fe0416e17e8a0ca5c8a432b8daf574df86..226c0d5eedaca1a14c133fb909b502c83f0d1c82 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1034,7 +1034,7 @@ int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        if (!write && !buffer_mapped(&bh)) {
                spinlock_t *ptl;
                pmd_t entry;
-               struct page *zero_page = get_huge_zero_page();
+               struct page *zero_page = mm_get_huge_zero_page(vma->vm_mm);
 
                if (unlikely(!zero_page)) {
                        dax_pmd_dbg(&bh, address, "no zero page");
index 5efeefe17abb4bd9c885e35c3e93cff83cad0a7f..d86831c5985a3195581bf58c965a7f111bb93abf 100644 (file)
@@ -172,6 +172,7 @@ const struct file_operations ext2_file_operations = {
        .open           = dquot_file_open,
        .release        = ext2_release_file,
        .fsync          = ext2_fsync,
+       .get_unmapped_area = thp_get_unmapped_area,
        .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
 };
index 261ac3734c580a9b555f0e8f5e24fe56ce7ec047..28f542bb0bda4fc8f9f1bfbc076bb1c1efa109a0 100644 (file)
@@ -703,6 +703,7 @@ const struct file_operations ext4_file_operations = {
        .open           = ext4_file_open,
        .release        = ext4_release_file,
        .fsync          = ext4_sync_file,
+       .get_unmapped_area = thp_get_unmapped_area,
        .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = ext4_fallocate,
index 48d1adfe25d69c6675a3c6a2a1da2930dae9f62b..bed73db108c5cdc928f3b07a3c81cbfdcdcb25ee 100644 (file)
@@ -682,11 +682,11 @@ unsigned int nfs_page_length(struct page *page)
        loff_t i_size = i_size_read(page_file_mapping(page)->host);
 
        if (i_size > 0) {
-               pgoff_t page_index = page_file_index(page);
+               pgoff_t index = page_index(page);
                pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
-               if (page_index < end_index)
+               if (index < end_index)
                        return PAGE_SIZE;
-               if (page_index == end_index)
+               if (index == end_index)
                        return ((i_size - 1) & ~PAGE_MASK) + 1;
        }
        return 0;
index 174dd4cf5747f50afaa4d7217fdee16a1e6de11a..965db474f4b0d11d7a59fe839703c7505d714ce9 100644 (file)
@@ -342,7 +342,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
         * update_nfs_request below if the region is not locked. */
        req->wb_page    = page;
        if (page) {
-               req->wb_index = page_file_index(page);
+               req->wb_index = page_index(page);
                get_page(page);
        }
        req->wb_offset  = offset;
index 572e5b3b06f1566f40e7df7be33b8f14aafd2e16..defc9233e9858c43a9438b2cf918a899b7f56afa 100644 (file)
@@ -295,7 +295,7 @@ int nfs_readpage(struct file *file, struct page *page)
        int             error;
 
        dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
-               page, PAGE_SIZE, page_file_index(page));
+               page, PAGE_SIZE, page_index(page));
        nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
        nfs_add_stats(inode, NFSIOS_READPAGES, 1);
 
index 3a6724c6eb5ffbd6e83e45354cb2d4d068577527..53211838f72aac44c416b13ac926f3533d7e2553 100644 (file)
@@ -151,7 +151,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
        spin_lock(&inode->i_lock);
        i_size = i_size_read(inode);
        end_index = (i_size - 1) >> PAGE_SHIFT;
-       if (i_size > 0 && page_file_index(page) < end_index)
+       if (i_size > 0 && page_index(page) < end_index)
                goto out;
        end = page_file_offset(page) + ((loff_t)offset+count);
        if (i_size >= end)
@@ -603,7 +603,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
 {
        int ret;
 
-       nfs_pageio_cond_complete(pgio, page_file_index(page));
+       nfs_pageio_cond_complete(pgio, page_index(page));
        ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
                                   launder);
        if (ret == -EAGAIN) {
index 9d46a0bdd9f9aea4d882a77ac3c22454d0f2b634..62469c60be23263f21903fadc2217488c7160ed6 100644 (file)
@@ -55,10 +55,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
                        goto oom;
 
                for (i = 0; i < rqgi->ngroups; i++) {
-                       if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i)))
-                               GROUP_AT(gi, i) = exp->ex_anon_gid;
+                       if (gid_eq(GLOBAL_ROOT_GID, rqgi->gid[i]))
+                               gi->gid[i] = exp->ex_anon_gid;
                        else
-                               GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
+                               gi->gid[i] = rqgi->gid[i];
                }
        } else {
                gi = get_group_info(rqgi);
index a204d7e109d4d63a76d01a31198b00f3f1cd09be..39bfaba9c99c932a3ea8cf7005cb014d4dcd175f 100644 (file)
@@ -1903,7 +1903,7 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2)
        if (g1->ngroups != g2->ngroups)
                return false;
        for (i=0; i<g1->ngroups; i++)
-               if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i)))
+               if (!gid_eq(g1->gid[i], g2->gid[i]))
                        return false;
        return true;
 }
index 1d67fcbf7160647aed150da9f793b7d7ac0fd8e2..8abab16b4602b0348e8307ddb28623c43251f887 100644 (file)
@@ -2104,7 +2104,7 @@ int o2net_start_listening(struct o2nm_node *node)
        BUG_ON(o2net_listen_sock != NULL);
 
        mlog(ML_KTHREAD, "starting o2net thread...\n");
-       o2net_wq = create_singlethread_workqueue("o2net");
+       o2net_wq = alloc_ordered_workqueue("o2net", WQ_MEM_RECLAIM);
        if (o2net_wq == NULL) {
                mlog(ML_ERROR, "unable to launch o2net thread\n");
                return -ENOMEM; /* ? */
index cdeafb4e7ed60d496200cd8a78806c874e9d8c62..0bb128659d4b0b3b0be717776b4ab6bdb06b6a39 100644 (file)
@@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
                                  struct dlm_lock *lock, int flags, int type)
 {
        enum dlm_status status;
-       u8 old_owner = res->owner;
 
        mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
             lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 
        spin_lock(&res->spinlock);
        res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
-       lock->convert_pending = 0;
        /* if it failed, move it back to granted queue.
         * if master returns DLM_NORMAL and then down before sending ast,
         * it may have already been moved to granted queue, reset to
@@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
                if (status != DLM_NOTQUEUED)
                        dlm_error(status);
                dlm_revert_pending_convert(res, lock);
-       } else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
-                       (old_owner != res->owner)) {
-               mlog(0, "res %.*s is in recovering or has been recovered.\n",
-                               res->lockname.len, res->lockname.name);
+       } else if (!lock->convert_pending) {
+               mlog(0, "%s: res %.*s, owner died and lock has been moved back "
+                               "to granted list, retry convert.\n",
+                               dlm->name, res->lockname.len, res->lockname.name);
                status = DLM_RECOVERING;
        }
+
+       lock->convert_pending = 0;
 bail:
        spin_unlock(&res->spinlock);
 
index 533bd524e41eb087ba6775a0f76bab9f95948754..733e4e79c8e25f02e3a3af1ad562049d32742cbc 100644 (file)
@@ -1904,7 +1904,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
        }
 
        snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name);
-       dlm->dlm_worker = create_singlethread_workqueue(wq_name);
+       dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0);
        if (!dlm->dlm_worker) {
                status = -ENOMEM;
                mlog_errno(status);
index ef474cdd640479d1e9b3b48204f12dc5c6a7704f..354cdf9714aa482e80862ba1fc760a5138fa3ac7 100644 (file)
@@ -646,7 +646,7 @@ static int __init init_dlmfs_fs(void)
        }
        cleanup_inode = 1;
 
-       user_dlm_worker = create_singlethread_workqueue("user_dlm");
+       user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0);
        if (!user_dlm_worker) {
                status = -ENOMEM;
                goto bail;
index 603b28d6f0082b7e7a7740f785faeabe281252ef..f56fe39fab04010174c6b300b32f69c52c6ac69c 100644 (file)
@@ -2329,7 +2329,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
        }
        cleancache_init_shared_fs(sb);
 
-       osb->ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
+       osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM);
        if (!osb->ocfs2_wq) {
                status = -ENOMEM;
                mlog_errno(status);
index 4ebe6b2e5217c2e26c7185bcf4254d8af3b55872..10c5ad5635a8d052b0f3367acdf782cf496ea80f 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -604,54 +604,63 @@ pipe_fasync(int fd, struct file *filp, int on)
        return retval;
 }
 
-static void account_pipe_buffers(struct pipe_inode_info *pipe,
+static unsigned long account_pipe_buffers(struct user_struct *user,
                                  unsigned long old, unsigned long new)
 {
-       atomic_long_add(new - old, &pipe->user->pipe_bufs);
+       return atomic_long_add_return(new - old, &user->pipe_bufs);
 }
 
-static bool too_many_pipe_buffers_soft(struct user_struct *user)
+static bool too_many_pipe_buffers_soft(unsigned long user_bufs)
 {
-       return pipe_user_pages_soft &&
-              atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
+       return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft;
 }
 
-static bool too_many_pipe_buffers_hard(struct user_struct *user)
+static bool too_many_pipe_buffers_hard(unsigned long user_bufs)
 {
-       return pipe_user_pages_hard &&
-              atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
+       return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard;
 }
 
 struct pipe_inode_info *alloc_pipe_info(void)
 {
        struct pipe_inode_info *pipe;
+       unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
+       struct user_struct *user = get_current_user();
+       unsigned long user_bufs;
 
        pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
-       if (pipe) {
-               unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
-               struct user_struct *user = get_current_user();
-
-               if (!too_many_pipe_buffers_hard(user)) {
-                       if (too_many_pipe_buffers_soft(user))
-                               pipe_bufs = 1;
-                       pipe->bufs = kcalloc(pipe_bufs,
-                                            sizeof(struct pipe_buffer),
-                                            GFP_KERNEL_ACCOUNT);
-               }
+       if (pipe == NULL)
+               goto out_free_uid;
 
-               if (pipe->bufs) {
-                       init_waitqueue_head(&pipe->wait);
-                       pipe->r_counter = pipe->w_counter = 1;
-                       pipe->buffers = pipe_bufs;
-                       pipe->user = user;
-                       account_pipe_buffers(pipe, 0, pipe_bufs);
-                       mutex_init(&pipe->mutex);
-                       return pipe;
-               }
-               free_uid(user);
-               kfree(pipe);
+       if (pipe_bufs * PAGE_SIZE > pipe_max_size && !capable(CAP_SYS_RESOURCE))
+               pipe_bufs = pipe_max_size >> PAGE_SHIFT;
+
+       user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
+
+       if (too_many_pipe_buffers_soft(user_bufs)) {
+               user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
+               pipe_bufs = 1;
+       }
+
+       if (too_many_pipe_buffers_hard(user_bufs))
+               goto out_revert_acct;
+
+       pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
+                            GFP_KERNEL_ACCOUNT);
+
+       if (pipe->bufs) {
+               init_waitqueue_head(&pipe->wait);
+               pipe->r_counter = pipe->w_counter = 1;
+               pipe->buffers = pipe_bufs;
+               pipe->user = user;
+               mutex_init(&pipe->mutex);
+               return pipe;
        }
 
+out_revert_acct:
+       (void) account_pipe_buffers(user, pipe_bufs, 0);
+       kfree(pipe);
+out_free_uid:
+       free_uid(user);
        return NULL;
 }
 
@@ -659,7 +668,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
 {
        int i;
 
-       account_pipe_buffers(pipe, pipe->buffers, 0);
+       (void) account_pipe_buffers(pipe->user, pipe->buffers, 0);
        free_uid(pipe->user);
        for (i = 0; i < pipe->buffers; i++) {
                struct pipe_buffer *buf = pipe->bufs + i;
@@ -1010,13 +1019,55 @@ const struct file_operations pipefifo_fops = {
        .fasync         = pipe_fasync,
 };
 
+/*
+ * Currently we rely on the pipe array holding a power-of-2 number
+ * of pages.
+ */
+static inline unsigned int round_pipe_size(unsigned int size)
+{
+       unsigned long nr_pages;
+
+       nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
+}
+
 /*
  * Allocate a new array of pipe buffers and copy the info over. Returns the
  * pipe size if successful, or return -ERROR on error.
  */
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
 {
        struct pipe_buffer *bufs;
+       unsigned int size, nr_pages;
+       unsigned long user_bufs;
+       long ret = 0;
+
+       size = round_pipe_size(arg);
+       nr_pages = size >> PAGE_SHIFT;
+
+       if (!nr_pages)
+               return -EINVAL;
+
+       /*
+        * If trying to increase the pipe capacity, check that an
+        * unprivileged user is not trying to exceed various limits
+        * (soft limit check here, hard limit check just below).
+        * Decreasing the pipe capacity is always permitted, even
+        * if the user is currently over a limit.
+        */
+       if (nr_pages > pipe->buffers &&
+                       size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
+               return -EPERM;
+
+       user_bufs = account_pipe_buffers(pipe->user, pipe->buffers, nr_pages);
+
+       if (nr_pages > pipe->buffers &&
+                       (too_many_pipe_buffers_hard(user_bufs) ||
+                        too_many_pipe_buffers_soft(user_bufs)) &&
+                       !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+               ret = -EPERM;
+               goto out_revert_acct;
+       }
 
        /*
         * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
@@ -1024,13 +1075,17 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
         * again like we would do for growing. If the pipe currently
         * contains more buffers than arg, then return busy.
         */
-       if (nr_pages < pipe->nrbufs)
-               return -EBUSY;
+       if (nr_pages < pipe->nrbufs) {
+               ret = -EBUSY;
+               goto out_revert_acct;
+       }
 
        bufs = kcalloc(nr_pages, sizeof(*bufs),
                       GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
-       if (unlikely(!bufs))
-               return -ENOMEM;
+       if (unlikely(!bufs)) {
+               ret = -ENOMEM;
+               goto out_revert_acct;
+       }
 
        /*
         * The pipe array wraps around, so just start the new one at zero
@@ -1053,24 +1108,15 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
                        memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
        }
 
-       account_pipe_buffers(pipe, pipe->buffers, nr_pages);
        pipe->curbuf = 0;
        kfree(pipe->bufs);
        pipe->bufs = bufs;
        pipe->buffers = nr_pages;
        return nr_pages * PAGE_SIZE;
-}
-
-/*
- * Currently we rely on the pipe array holding a power-of-2 number
- * of pages.
- */
-static inline unsigned int round_pipe_size(unsigned int size)
-{
-       unsigned long nr_pages;
 
-       nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
+out_revert_acct:
+       (void) account_pipe_buffers(pipe->user, nr_pages, pipe->buffers);
+       return ret;
 }
 
 /*
@@ -1112,28 +1158,9 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
        __pipe_lock(pipe);
 
        switch (cmd) {
-       case F_SETPIPE_SZ: {
-               unsigned int size, nr_pages;
-
-               size = round_pipe_size(arg);
-               nr_pages = size >> PAGE_SHIFT;
-
-               ret = -EINVAL;
-               if (!nr_pages)
-                       goto out;
-
-               if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
-                       ret = -EPERM;
-                       goto out;
-               } else if ((too_many_pipe_buffers_hard(pipe->user) ||
-                           too_many_pipe_buffers_soft(pipe->user)) &&
-                          !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
-                       ret = -EPERM;
-                       goto out;
-               }
-               ret = pipe_set_size(pipe, nr_pages);
+       case F_SETPIPE_SZ:
+               ret = pipe_set_size(pipe, arg);
                break;
-               }
        case F_GETPIPE_SZ:
                ret = pipe->buffers * PAGE_SIZE;
                break;
@@ -1142,7 +1169,6 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
                break;
        }
 
-out:
        __pipe_unlock(pipe);
        return ret;
 }
index 88c7de12197bd763c452b7770fcd19d15dcb3df1..89600fd5963d46d5a5bd0915bde36850f7e71110 100644 (file)
@@ -186,51 +186,45 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
        task_unlock(p);
        rcu_read_unlock();
 
-       seq_printf(m,
-               "State:\t%s\n"
-               "Tgid:\t%d\n"
-               "Ngid:\t%d\n"
-               "Pid:\t%d\n"
-               "PPid:\t%d\n"
-               "TracerPid:\t%d\n"
-               "Uid:\t%d\t%d\t%d\t%d\n"
-               "Gid:\t%d\t%d\t%d\t%d\n"
-               "FDSize:\t%d\nGroups:\t",
-               get_task_state(p),
-               tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid,
-               from_kuid_munged(user_ns, cred->uid),
-               from_kuid_munged(user_ns, cred->euid),
-               from_kuid_munged(user_ns, cred->suid),
-               from_kuid_munged(user_ns, cred->fsuid),
-               from_kgid_munged(user_ns, cred->gid),
-               from_kgid_munged(user_ns, cred->egid),
-               from_kgid_munged(user_ns, cred->sgid),
-               from_kgid_munged(user_ns, cred->fsgid),
-               max_fds);
-
+       seq_printf(m, "State:\t%s", get_task_state(p));
+
+       seq_put_decimal_ull(m, "\nTgid:\t", tgid);
+       seq_put_decimal_ull(m, "\nNgid:\t", ngid);
+       seq_put_decimal_ull(m, "\nPid:\t", pid_nr_ns(pid, ns));
+       seq_put_decimal_ull(m, "\nPPid:\t", ppid);
+       seq_put_decimal_ull(m, "\nTracerPid:\t", tpid);
+       seq_put_decimal_ull(m, "\nUid:\t", from_kuid_munged(user_ns, cred->uid));
+       seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->euid));
+       seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->suid));
+       seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->fsuid));
+       seq_put_decimal_ull(m, "\nGid:\t", from_kgid_munged(user_ns, cred->gid));
+       seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->egid));
+       seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->sgid));
+       seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->fsgid));
+       seq_put_decimal_ull(m, "\nFDSize:\t", max_fds);
+
+       seq_puts(m, "\nGroups:\t");
        group_info = cred->group_info;
        for (g = 0; g < group_info->ngroups; g++)
-               seq_printf(m, "%d ",
-                          from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
+               seq_put_decimal_ull(m, g ? " " : "",
+                               from_kgid_munged(user_ns, group_info->gid[g]));
        put_cred(cred);
+       /* Trailing space shouldn't have been added in the first place. */
+       seq_putc(m, ' ');
 
 #ifdef CONFIG_PID_NS
        seq_puts(m, "\nNStgid:");
        for (g = ns->level; g <= pid->level; g++)
-               seq_printf(m, "\t%d",
-                       task_tgid_nr_ns(p, pid->numbers[g].ns));
+               seq_put_decimal_ull(m, "\t", task_tgid_nr_ns(p, pid->numbers[g].ns));
        seq_puts(m, "\nNSpid:");
        for (g = ns->level; g <= pid->level; g++)
-               seq_printf(m, "\t%d",
-                       task_pid_nr_ns(p, pid->numbers[g].ns));
+               seq_put_decimal_ull(m, "\t", task_pid_nr_ns(p, pid->numbers[g].ns));
        seq_puts(m, "\nNSpgid:");
        for (g = ns->level; g <= pid->level; g++)
-               seq_printf(m, "\t%d",
-                       task_pgrp_nr_ns(p, pid->numbers[g].ns));
+               seq_put_decimal_ull(m, "\t", task_pgrp_nr_ns(p, pid->numbers[g].ns));
        seq_puts(m, "\nNSsid:");
        for (g = ns->level; g <= pid->level; g++)
-               seq_printf(m, "\t%d",
-                       task_session_nr_ns(p, pid->numbers[g].ns));
+               seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns));
 #endif
        seq_putc(m, '\n');
 }
@@ -299,11 +293,12 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
                unlock_task_sighand(p, &flags);
        }
 
-       seq_printf(m, "Threads:\t%d\n", num_threads);
-       seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim);
+       seq_put_decimal_ull(m, "Threads:\t", num_threads);
+       seq_put_decimal_ull(m, "\nSigQ:\t", qsize);
+       seq_put_decimal_ull(m, "/", qlim);
 
        /* render them all */
-       render_sigset_t(m, "SigPnd:\t", &pending);
+       render_sigset_t(m, "\nSigPnd:\t", &pending);
        render_sigset_t(m, "ShdPnd:\t", &shpending);
        render_sigset_t(m, "SigBlk:\t", &blocked);
        render_sigset_t(m, "SigIgn:\t", &ignored);
@@ -348,17 +343,17 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
 {
 #ifdef CONFIG_SECCOMP
-       seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
+       seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode);
+       seq_putc(m, '\n');
 #endif
 }
 
 static inline void task_context_switch_counts(struct seq_file *m,
                                                struct task_struct *p)
 {
-       seq_printf(m,   "voluntary_ctxt_switches:\t%lu\n"
-                       "nonvoluntary_ctxt_switches:\t%lu\n",
-                       p->nvcsw,
-                       p->nivcsw);
+       seq_put_decimal_ull(m, "voluntary_ctxt_switches:\t", p->nvcsw);
+       seq_put_decimal_ull(m, "\nnonvoluntary_ctxt_switches:\t", p->nivcsw);
+       seq_putc(m, '\n');
 }
 
 static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
@@ -490,41 +485,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
        start_time = nsec_to_clock_t(task->real_start_time);
 
        seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
-       seq_put_decimal_ll(m, ' ', ppid);
-       seq_put_decimal_ll(m, ' ', pgid);
-       seq_put_decimal_ll(m, ' ', sid);
-       seq_put_decimal_ll(m, ' ', tty_nr);
-       seq_put_decimal_ll(m, ' ', tty_pgrp);
-       seq_put_decimal_ull(m, ' ', task->flags);
-       seq_put_decimal_ull(m, ' ', min_flt);
-       seq_put_decimal_ull(m, ' ', cmin_flt);
-       seq_put_decimal_ull(m, ' ', maj_flt);
-       seq_put_decimal_ull(m, ' ', cmaj_flt);
-       seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime));
-       seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime));
-       seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime));
-       seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime));
-       seq_put_decimal_ll(m, ' ', priority);
-       seq_put_decimal_ll(m, ' ', nice);
-       seq_put_decimal_ll(m, ' ', num_threads);
-       seq_put_decimal_ull(m, ' ', 0);
-       seq_put_decimal_ull(m, ' ', start_time);
-       seq_put_decimal_ull(m, ' ', vsize);
-       seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0);
-       seq_put_decimal_ull(m, ' ', rsslim);
-       seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
-       seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
-       seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0);
-       seq_put_decimal_ull(m, ' ', esp);
-       seq_put_decimal_ull(m, ' ', eip);
+       seq_put_decimal_ll(m, " ", ppid);
+       seq_put_decimal_ll(m, " ", pgid);
+       seq_put_decimal_ll(m, " ", sid);
+       seq_put_decimal_ll(m, " ", tty_nr);
+       seq_put_decimal_ll(m, " ", tty_pgrp);
+       seq_put_decimal_ull(m, " ", task->flags);
+       seq_put_decimal_ull(m, " ", min_flt);
+       seq_put_decimal_ull(m, " ", cmin_flt);
+       seq_put_decimal_ull(m, " ", maj_flt);
+       seq_put_decimal_ull(m, " ", cmaj_flt);
+       seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime));
+       seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime));
+       seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime));
+       seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime));
+       seq_put_decimal_ll(m, " ", priority);
+       seq_put_decimal_ll(m, " ", nice);
+       seq_put_decimal_ll(m, " ", num_threads);
+       seq_put_decimal_ull(m, " ", 0);
+       seq_put_decimal_ull(m, " ", start_time);
+       seq_put_decimal_ull(m, " ", vsize);
+       seq_put_decimal_ull(m, " ", mm ? get_mm_rss(mm) : 0);
+       seq_put_decimal_ull(m, " ", rsslim);
+       seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->start_code : 1) : 0);
+       seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->end_code : 1) : 0);
+       seq_put_decimal_ull(m, " ", (permitted && mm) ? mm->start_stack : 0);
+       seq_put_decimal_ull(m, " ", esp);
+       seq_put_decimal_ull(m, " ", eip);
        /* The signal information here is obsolete.
         * It must be decimal for Linux 2.0 compatibility.
         * Use /proc/#/status for real-time signals.
         */
-       seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL);
-       seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
-       seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
-       seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
+       seq_put_decimal_ull(m, " ", task->pending.signal.sig[0] & 0x7fffffffUL);
+       seq_put_decimal_ull(m, " ", task->blocked.sig[0] & 0x7fffffffUL);
+       seq_put_decimal_ull(m, " ", sigign.sig[0] & 0x7fffffffUL);
+       seq_put_decimal_ull(m, " ", sigcatch.sig[0] & 0x7fffffffUL);
 
        /*
         * We used to output the absolute kernel address, but that's an
@@ -538,31 +533,31 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
        else
                seq_puts(m, " 0");
 
-       seq_put_decimal_ull(m, ' ', 0);
-       seq_put_decimal_ull(m, ' ', 0);
-       seq_put_decimal_ll(m, ' ', task->exit_signal);
-       seq_put_decimal_ll(m, ' ', task_cpu(task));
-       seq_put_decimal_ull(m, ' ', task->rt_priority);
-       seq_put_decimal_ull(m, ' ', task->policy);
-       seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
-       seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
-       seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
+       seq_put_decimal_ull(m, " ", 0);
+       seq_put_decimal_ull(m, " ", 0);
+       seq_put_decimal_ll(m, " ", task->exit_signal);
+       seq_put_decimal_ll(m, " ", task_cpu(task));
+       seq_put_decimal_ull(m, " ", task->rt_priority);
+       seq_put_decimal_ull(m, " ", task->policy);
+       seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task));
+       seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime));
+       seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime));
 
        if (mm && permitted) {
-               seq_put_decimal_ull(m, ' ', mm->start_data);
-               seq_put_decimal_ull(m, ' ', mm->end_data);
-               seq_put_decimal_ull(m, ' ', mm->start_brk);
-               seq_put_decimal_ull(m, ' ', mm->arg_start);
-               seq_put_decimal_ull(m, ' ', mm->arg_end);
-               seq_put_decimal_ull(m, ' ', mm->env_start);
-               seq_put_decimal_ull(m, ' ', mm->env_end);
+               seq_put_decimal_ull(m, " ", mm->start_data);
+               seq_put_decimal_ull(m, " ", mm->end_data);
+               seq_put_decimal_ull(m, " ", mm->start_brk);
+               seq_put_decimal_ull(m, " ", mm->arg_start);
+               seq_put_decimal_ull(m, " ", mm->arg_end);
+               seq_put_decimal_ull(m, " ", mm->env_start);
+               seq_put_decimal_ull(m, " ", mm->env_end);
        } else
-               seq_printf(m, " 0 0 0 0 0 0 0");
+               seq_puts(m, " 0 0 0 0 0 0 0");
 
        if (permitted)
-               seq_put_decimal_ll(m, ' ', task->exit_code);
+               seq_put_decimal_ll(m, " ", task->exit_code);
        else
-               seq_put_decimal_ll(m, ' ', 0);
+               seq_puts(m, " 0");
 
        seq_putc(m, '\n');
        if (mm)
@@ -598,13 +593,13 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
         * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
         *               size, resident, shared, text, data);
         */
-       seq_put_decimal_ull(m, 0, size);
-       seq_put_decimal_ull(m, ' ', resident);
-       seq_put_decimal_ull(m, ' ', shared);
-       seq_put_decimal_ull(m, ' ', text);
-       seq_put_decimal_ull(m, ' ', 0);
-       seq_put_decimal_ull(m, ' ', data);
-       seq_put_decimal_ull(m, ' ', 0);
+       seq_put_decimal_ull(m, "", size);
+       seq_put_decimal_ull(m, " ", resident);
+       seq_put_decimal_ull(m, " ", shared);
+       seq_put_decimal_ull(m, " ", text);
+       seq_put_decimal_ull(m, " ", 0);
+       seq_put_decimal_ull(m, " ", data);
+       seq_put_decimal_ull(m, " ", 0);
        seq_putc(m, '\n');
 
        return 0;
index 3b792ab3c0dc17d47b42643ec9a9807f663a229c..dc7fe5f3a53c5e97a2b1df4b89906c12ca364434 100644 (file)
@@ -2280,16 +2280,27 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
        if (!p)
                return -ESRCH;
 
-       if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
-               task_lock(p);
-               if (slack_ns == 0)
-                       p->timer_slack_ns = p->default_timer_slack_ns;
-               else
-                       p->timer_slack_ns = slack_ns;
-               task_unlock(p);
-       } else
-               count = -EPERM;
+       if (p != current) {
+               if (!capable(CAP_SYS_NICE)) {
+                       count = -EPERM;
+                       goto out;
+               }
+
+               err = security_task_setscheduler(p);
+               if (err) {
+                       count = err;
+                       goto out;
+               }
+       }
+
+       task_lock(p);
+       if (slack_ns == 0)
+               p->timer_slack_ns = p->default_timer_slack_ns;
+       else
+               p->timer_slack_ns = slack_ns;
+       task_unlock(p);
 
+out:
        put_task_struct(p);
 
        return count;
@@ -2299,19 +2310,28 @@ static int timerslack_ns_show(struct seq_file *m, void *v)
 {
        struct inode *inode = m->private;
        struct task_struct *p;
-       int err =  0;
+       int err = 0;
 
        p = get_proc_task(inode);
        if (!p)
                return -ESRCH;
 
-       if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
-               task_lock(p);
-               seq_printf(m, "%llu\n", p->timer_slack_ns);
-               task_unlock(p);
-       } else
-               err = -EPERM;
+       if (p != current) {
+
+               if (!capable(CAP_SYS_NICE)) {
+                       err = -EPERM;
+                       goto out;
+               }
+               err = security_task_getscheduler(p);
+               if (err)
+                       goto out;
+       }
 
+       task_lock(p);
+       seq_printf(m, "%llu\n", p->timer_slack_ns);
+       task_unlock(p);
+
+out:
        put_task_struct(p);
 
        return err;
index 01df23cc81f62d4a6d1c3aadcb1b48495b0d7917..d21dafef31029deb345e98b36fe133dc73599e00 100644 (file)
@@ -31,7 +31,7 @@ static int seq_show(struct seq_file *m, void *v)
        put_task_struct(task);
 
        if (files) {
-               int fd = proc_fd(m->private);
+               unsigned int fd = proc_fd(m->private);
 
                spin_lock(&files->file_lock);
                file = fcheck_files(files, fd);
@@ -86,7 +86,7 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
        struct task_struct *task;
        const struct cred *cred;
        struct inode *inode;
-       int fd;
+       unsigned int fd;
 
        if (flags & LOOKUP_RCU)
                return -ECHILD;
@@ -158,7 +158,7 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
        }
 
        if (files) {
-               int fd = proc_fd(d_inode(dentry));
+               unsigned int fd = proc_fd(d_inode(dentry));
                struct file *fd_file;
 
                spin_lock(&files->file_lock);
@@ -253,7 +253,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
                        continue;
                rcu_read_unlock();
 
-               len = snprintf(name, sizeof(name), "%d", fd);
+               len = snprintf(name, sizeof(name), "%u", fd);
                if (!proc_fill_cache(file, ctx,
                                     name, len, instantiate, p,
                                     (void *)(unsigned long)fd))
index 7c047f256ae2ccb945aab10d8688d6305b5a0eb7..46dafadd0083a5b59fe7c7a5bc6c8d33282a74be 100644 (file)
@@ -11,7 +11,7 @@ extern const struct inode_operations proc_fdinfo_inode_operations;
 
 extern int proc_fd_permission(struct inode *inode, int mask);
 
-static inline int proc_fd(struct inode *inode)
+static inline unsigned int proc_fd(struct inode *inode)
 {
        return PROC_I(inode)->fd;
 }
index 7931c558c19250ab87fd911b6c09ff359d4d05e1..5378441ec1b7bccb0c98a6a5ce077be9b4da921f 100644 (file)
@@ -60,7 +60,7 @@ union proc_op {
 
 struct proc_inode {
        struct pid *pid;
-       int fd;
+       unsigned int fd;
        union proc_op op;
        struct proc_dir_entry *pde;
        struct ctl_table_header *sysctl;
index b9a8c813e5e66b5e751080e1bd7b11b7e8d87634..8a428498d6b21f08c8c26ef184ff9f4332b5cdd0 100644 (file)
@@ -23,6 +23,25 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 {
 }
 
+static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
+{
+       char v[32];
+       static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '};
+       int len;
+
+       len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10));
+
+       seq_write(m, s, 16);
+
+       if (len > 0) {
+               if (len < 8)
+                       seq_write(m, blanks, 8 - len);
+
+               seq_write(m, v, len);
+       }
+       seq_write(m, " kB\n", 4);
+}
+
 static int meminfo_proc_show(struct seq_file *m, void *v)
 {
        struct sysinfo i;
@@ -32,10 +51,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
        unsigned long pages[NR_LRU_LISTS];
        int lru;
 
-/*
- * display in kilobytes.
- */
-#define K(x) ((x) << (PAGE_SHIFT - 10))
        si_meminfo(&i);
        si_swapinfo(&i);
        committed = percpu_counter_read_positive(&vm_committed_as);
@@ -50,136 +65,100 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 
        available = si_mem_available();
 
-       /*
-        * Tagged format, for easy grepping and expansion.
-        */
-       seq_printf(m,
-               "MemTotal:       %8lu kB\n"
-               "MemFree:        %8lu kB\n"
-               "MemAvailable:   %8lu kB\n"
-               "Buffers:        %8lu kB\n"
-               "Cached:         %8lu kB\n"
-               "SwapCached:     %8lu kB\n"
-               "Active:         %8lu kB\n"
-               "Inactive:       %8lu kB\n"
-               "Active(anon):   %8lu kB\n"
-               "Inactive(anon): %8lu kB\n"
-               "Active(file):   %8lu kB\n"
-               "Inactive(file): %8lu kB\n"
-               "Unevictable:    %8lu kB\n"
-               "Mlocked:        %8lu kB\n"
-#ifdef CONFIG_HIGHMEM
-               "HighTotal:      %8lu kB\n"
-               "HighFree:       %8lu kB\n"
-               "LowTotal:       %8lu kB\n"
-               "LowFree:        %8lu kB\n"
-#endif
-#ifndef CONFIG_MMU
-               "MmapCopy:       %8lu kB\n"
-#endif
-               "SwapTotal:      %8lu kB\n"
-               "SwapFree:       %8lu kB\n"
-               "Dirty:          %8lu kB\n"
-               "Writeback:      %8lu kB\n"
-               "AnonPages:      %8lu kB\n"
-               "Mapped:         %8lu kB\n"
-               "Shmem:          %8lu kB\n"
-               "Slab:           %8lu kB\n"
-               "SReclaimable:   %8lu kB\n"
-               "SUnreclaim:     %8lu kB\n"
-               "KernelStack:    %8lu kB\n"
-               "PageTables:     %8lu kB\n"
-#ifdef CONFIG_QUICKLIST
-               "Quicklists:     %8lu kB\n"
-#endif
-               "NFS_Unstable:   %8lu kB\n"
-               "Bounce:         %8lu kB\n"
-               "WritebackTmp:   %8lu kB\n"
-               "CommitLimit:    %8lu kB\n"
-               "Committed_AS:   %8lu kB\n"
-               "VmallocTotal:   %8lu kB\n"
-               "VmallocUsed:    %8lu kB\n"
-               "VmallocChunk:   %8lu kB\n"
-#ifdef CONFIG_MEMORY_FAILURE
-               "HardwareCorrupted: %5lu kB\n"
-#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               "AnonHugePages:  %8lu kB\n"
-               "ShmemHugePages: %8lu kB\n"
-               "ShmemPmdMapped: %8lu kB\n"
-#endif
-#ifdef CONFIG_CMA
-               "CmaTotal:       %8lu kB\n"
-               "CmaFree:        %8lu kB\n"
-#endif
-               ,
-               K(i.totalram),
-               K(i.freeram),
-               K(available),
-               K(i.bufferram),
-               K(cached),
-               K(total_swapcache_pages()),
-               K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
-               K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
-               K(pages[LRU_ACTIVE_ANON]),
-               K(pages[LRU_INACTIVE_ANON]),
-               K(pages[LRU_ACTIVE_FILE]),
-               K(pages[LRU_INACTIVE_FILE]),
-               K(pages[LRU_UNEVICTABLE]),
-               K(global_page_state(NR_MLOCK)),
+       show_val_kb(m, "MemTotal:       ", i.totalram);
+       show_val_kb(m, "MemFree:        ", i.freeram);
+       show_val_kb(m, "MemAvailable:   ", available);
+       show_val_kb(m, "Buffers:        ", i.bufferram);
+       show_val_kb(m, "Cached:         ", cached);
+       show_val_kb(m, "SwapCached:     ", total_swapcache_pages());
+       show_val_kb(m, "Active:         ", pages[LRU_ACTIVE_ANON] +
+                                          pages[LRU_ACTIVE_FILE]);
+       show_val_kb(m, "Inactive:       ", pages[LRU_INACTIVE_ANON] +
+                                          pages[LRU_INACTIVE_FILE]);
+       show_val_kb(m, "Active(anon):   ", pages[LRU_ACTIVE_ANON]);
+       show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]);
+       show_val_kb(m, "Active(file):   ", pages[LRU_ACTIVE_FILE]);
+       show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]);
+       show_val_kb(m, "Unevictable:    ", pages[LRU_UNEVICTABLE]);
+       show_val_kb(m, "Mlocked:        ", global_page_state(NR_MLOCK));
+
 #ifdef CONFIG_HIGHMEM
-               K(i.totalhigh),
-               K(i.freehigh),
-               K(i.totalram-i.totalhigh),
-               K(i.freeram-i.freehigh),
+       show_val_kb(m, "HighTotal:      ", i.totalhigh);
+       show_val_kb(m, "HighFree:       ", i.freehigh);
+       show_val_kb(m, "LowTotal:       ", i.totalram - i.totalhigh);
+       show_val_kb(m, "LowFree:        ", i.freeram - i.freehigh);
 #endif
+
 #ifndef CONFIG_MMU
-               K((unsigned long) atomic_long_read(&mmap_pages_allocated)),
+       show_val_kb(m, "MmapCopy:       ",
+                   (unsigned long)atomic_long_read(&mmap_pages_allocated));
 #endif
-               K(i.totalswap),
-               K(i.freeswap),
-               K(global_node_page_state(NR_FILE_DIRTY)),
-               K(global_node_page_state(NR_WRITEBACK)),
-               K(global_node_page_state(NR_ANON_MAPPED)),
-               K(global_node_page_state(NR_FILE_MAPPED)),
-               K(i.sharedram),
-               K(global_page_state(NR_SLAB_RECLAIMABLE) +
-                               global_page_state(NR_SLAB_UNRECLAIMABLE)),
-               K(global_page_state(NR_SLAB_RECLAIMABLE)),
-               K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
-               global_page_state(NR_KERNEL_STACK_KB),
-               K(global_page_state(NR_PAGETABLE)),
+
+       show_val_kb(m, "SwapTotal:      ", i.totalswap);
+       show_val_kb(m, "SwapFree:       ", i.freeswap);
+       show_val_kb(m, "Dirty:          ",
+                   global_node_page_state(NR_FILE_DIRTY));
+       show_val_kb(m, "Writeback:      ",
+                   global_node_page_state(NR_WRITEBACK));
+       show_val_kb(m, "AnonPages:      ",
+                   global_node_page_state(NR_ANON_MAPPED));
+       show_val_kb(m, "Mapped:         ",
+                   global_node_page_state(NR_FILE_MAPPED));
+       show_val_kb(m, "Shmem:          ", i.sharedram);
+       show_val_kb(m, "Slab:           ",
+                   global_page_state(NR_SLAB_RECLAIMABLE) +
+                   global_page_state(NR_SLAB_UNRECLAIMABLE));
+
+       show_val_kb(m, "SReclaimable:   ",
+                   global_page_state(NR_SLAB_RECLAIMABLE));
+       show_val_kb(m, "SUnreclaim:     ",
+                   global_page_state(NR_SLAB_UNRECLAIMABLE));
+       seq_printf(m, "KernelStack:    %8lu kB\n",
+                  global_page_state(NR_KERNEL_STACK_KB));
+       show_val_kb(m, "PageTables:     ",
+                   global_page_state(NR_PAGETABLE));
 #ifdef CONFIG_QUICKLIST
-               K(quicklist_total_size()),
+       show_val_kb(m, "Quicklists:     ", quicklist_total_size());
 #endif
-               K(global_node_page_state(NR_UNSTABLE_NFS)),
-               K(global_page_state(NR_BOUNCE)),
-               K(global_node_page_state(NR_WRITEBACK_TEMP)),
-               K(vm_commit_limit()),
-               K(committed),
-               (unsigned long)VMALLOC_TOTAL >> 10,
-               0ul, // used to be vmalloc 'used'
-               0ul  // used to be vmalloc 'largest_chunk'
+
+       show_val_kb(m, "NFS_Unstable:   ",
+                   global_node_page_state(NR_UNSTABLE_NFS));
+       show_val_kb(m, "Bounce:         ",
+                   global_page_state(NR_BOUNCE));
+       show_val_kb(m, "WritebackTmp:   ",
+                   global_node_page_state(NR_WRITEBACK_TEMP));
+       show_val_kb(m, "CommitLimit:    ", vm_commit_limit());
+       show_val_kb(m, "Committed_AS:   ", committed);
+       seq_printf(m, "VmallocTotal:   %8lu kB\n",
+                  (unsigned long)VMALLOC_TOTAL >> 10);
+       show_val_kb(m, "VmallocUsed:    ", 0ul);
+       show_val_kb(m, "VmallocChunk:   ", 0ul);
+
 #ifdef CONFIG_MEMORY_FAILURE
-               , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
+       seq_printf(m, "HardwareCorrupted: %5lu kB\n",
+                  atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10));
 #endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               , K(global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR)
-               , K(global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR)
-               , K(global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR)
+       show_val_kb(m, "AnonHugePages:  ",
+                   global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR);
+       show_val_kb(m, "ShmemHugePages: ",
+                   global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
+       show_val_kb(m, "ShmemPmdMapped: ",
+                   global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
 #endif
+
 #ifdef CONFIG_CMA
-               , K(totalcma_pages)
-               , K(global_page_state(NR_FREE_CMA_PAGES))
+       show_val_kb(m, "CmaTotal:       ", totalcma_pages);
+       show_val_kb(m, "CmaFree:        ",
+                   global_page_state(NR_FREE_CMA_PAGES));
 #endif
-               );
 
        hugetlb_report_meminfo(m);
 
        arch_report_meminfo(m);
 
        return 0;
-#undef K
 }
 
 static int meminfo_proc_open(struct inode *inode, struct file *file)
index 7907e456ac4f8e8c42091ba74c5415e5397a14db..d700c42b357263b8e5106a7aed6fe1e301a7892c 100644 (file)
@@ -115,17 +115,16 @@ static int show_stat(struct seq_file *p, void *v)
        }
        sum += arch_irq_stat();
 
-       seq_puts(p, "cpu ");
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
-       seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+       seq_put_decimal_ull(p, "cpu  ", cputime64_to_clock_t(user));
+       seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+       seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+       seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+       seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+       seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+       seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+       seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+       seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+       seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
        seq_putc(p, '\n');
 
        for_each_online_cpu(i) {
@@ -141,23 +140,23 @@ static int show_stat(struct seq_file *p, void *v)
                guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
                guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
                seq_printf(p, "cpu%d", i);
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
-               seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+               seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
                seq_putc(p, '\n');
        }
-       seq_printf(p, "intr %llu", (unsigned long long)sum);
+       seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
 
        /* sum again ? it could be updated? */
        for_each_irq_nr(j)
-               seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
+               seq_put_decimal_ull(p, " ", kstat_irqs_usr(j));
 
        seq_printf(p,
                "\nctxt %llu\n"
@@ -171,10 +170,10 @@ static int show_stat(struct seq_file *p, void *v)
                nr_running(),
                nr_iowait());
 
-       seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
+       seq_put_decimal_ull(p, "softirq ", (unsigned long long)sum_softirq);
 
        for (i = 0; i < NR_SOFTIRQS; i++)
-               seq_put_decimal_ull(p, ' ', per_softirq_sums[i]);
+               seq_put_decimal_ull(p, " ", per_softirq_sums[i]);
        seq_putc(p, '\n');
 
        return 0;
index f6fa99eca5158f36d3fe38a23baac5b7c54ca6bb..d2a70cf2154e68958b57c3e884b7614522a925c4 100644 (file)
@@ -1070,7 +1070,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                        }
                        mmu_notifier_invalidate_range_start(mm, 0, -1);
                }
-               walk_page_range(0, ~0UL, &clear_refs_walk);
+               walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
                if (type == CLEAR_REFS_SOFT_DIRTY)
                        mmu_notifier_invalidate_range_end(mm, 0, -1);
                flush_tlb_mm(mm);
index 6dc4296eed62c5d2a493c5ba3f649c890aa37981..368bfb92b115c0e99ce4c654f6fdecb6ec5a2763 100644 (file)
@@ -679,11 +679,11 @@ EXPORT_SYMBOL(seq_puts);
 /*
  * A helper routine for putting decimal numbers without rich format of printf().
  * only 'unsigned long long' is supported.
- * This routine will put one byte delimiter + number into seq_file.
+ * This routine will put strlen(delimiter) + number into seq_file.
  * This routine is very quick when you show lots of numbers.
  * In usual cases, it will be better to use seq_printf(). It's easier to read.
  */
-void seq_put_decimal_ull(struct seq_file *m, char delimiter,
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
                         unsigned long long num)
 {
        int len;
@@ -691,8 +691,15 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter,
        if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
                goto overflow;
 
-       if (delimiter)
-               m->buf[m->count++] = delimiter;
+       len = strlen(delimiter);
+       if (m->count + len >= m->size)
+               goto overflow;
+
+       memcpy(m->buf + m->count, delimiter, len);
+       m->count += len;
+
+       if (m->count + 1 >= m->size)
+               goto overflow;
 
        if (num < 10) {
                m->buf[m->count++] = num + '0';
@@ -702,6 +709,7 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter,
        len = num_to_str(m->buf + m->count, m->size - m->count, num);
        if (!len)
                goto overflow;
+
        m->count += len;
        return;
 
@@ -710,19 +718,42 @@ overflow:
 }
 EXPORT_SYMBOL(seq_put_decimal_ull);
 
-void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num)
+void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num)
 {
+       int len;
+
+       if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
+               goto overflow;
+
+       len = strlen(delimiter);
+       if (m->count + len >= m->size)
+               goto overflow;
+
+       memcpy(m->buf + m->count, delimiter, len);
+       m->count += len;
+
+       if (m->count + 2 >= m->size)
+               goto overflow;
+
        if (num < 0) {
-               if (m->count + 3 >= m->size) {
-                       seq_set_overflow(m);
-                       return;
-               }
-               if (delimiter)
-                       m->buf[m->count++] = delimiter;
+               m->buf[m->count++] = '-';
                num = -num;
-               delimiter = '-';
        }
-       seq_put_decimal_ull(m, delimiter, num);
+
+       if (num < 10) {
+               m->buf[m->count++] = num + '0';
+               return;
+       }
+
+       len = num_to_str(m->buf + m->count, m->size - m->count, num);
+       if (!len)
+               goto overflow;
+
+       m->count += len;
+       return;
+
+overflow:
+       seq_set_overflow(m);
 }
 EXPORT_SYMBOL(seq_put_decimal_ll);
 
index e612a0233710850f34eae93540b098c3ae7cda8c..7f71bb8851602f380d2f55ff40c92d90b389f872 100644 (file)
@@ -1662,6 +1662,7 @@ const struct file_operations xfs_file_operations = {
        .open           = xfs_file_open,
        .release        = xfs_file_release,
        .fsync          = xfs_file_fsync,
+       .get_unmapped_area = thp_get_unmapped_area,
        .fallocate      = xfs_file_fallocate,
 };
 
index ad9d8f94dc7a949cf9309d61afe4cd5f2d17fcf9..c842c10d735e69c700a3b9892b795444bd0b711e 100644 (file)
                *(.spinlock.text)                                       \
                VMLINUX_SYMBOL(__lock_text_end) = .;
 
+#define CPUIDLE_TEXT                                                   \
+               ALIGN_FUNCTION();                                       \
+               VMLINUX_SYMBOL(__cpuidle_text_start) = .;               \
+               *(.cpuidle.text)                                        \
+               VMLINUX_SYMBOL(__cpuidle_text_end) = .;
+
 #define KPROBES_TEXT                                                   \
                ALIGN_FUNCTION();                                       \
                VMLINUX_SYMBOL(__kprobes_text_start) = .;               \
index 7caaf298f5399c90616838ccaf1f904066cb29e4..28c15050ebe606e3c6105ee994264de759082e49 100644 (file)
 #ifndef _LINUX_AUTO_DEV_IOCTL_H
 #define _LINUX_AUTO_DEV_IOCTL_H
 
-#include <linux/auto_fs.h>
-#include <linux/string.h>
-
-#define AUTOFS_DEVICE_NAME             "autofs"
-
-#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1
-#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0
-
-#define AUTOFS_DEVID_LEN               16
-
-#define AUTOFS_DEV_IOCTL_SIZE          sizeof(struct autofs_dev_ioctl)
-
-/*
- * An ioctl interface for autofs mount point control.
- */
-
-struct args_protover {
-       __u32   version;
-};
-
-struct args_protosubver {
-       __u32   sub_version;
-};
-
-struct args_openmount {
-       __u32   devid;
-};
-
-struct args_ready {
-       __u32   token;
-};
-
-struct args_fail {
-       __u32   token;
-       __s32   status;
-};
-
-struct args_setpipefd {
-       __s32   pipefd;
-};
-
-struct args_timeout {
-       __u64   timeout;
-};
-
-struct args_requester {
-       __u32   uid;
-       __u32   gid;
-};
-
-struct args_expire {
-       __u32   how;
-};
-
-struct args_askumount {
-       __u32   may_umount;
-};
-
-struct args_ismountpoint {
-       union {
-               struct args_in {
-                       __u32   type;
-               } in;
-               struct args_out {
-                       __u32   devid;
-                       __u32   magic;
-               } out;
-       };
-};
-
-/*
- * All the ioctls use this structure.
- * When sending a path size must account for the total length
- * of the chunk of memory otherwise is is the size of the
- * structure.
- */
-
-struct autofs_dev_ioctl {
-       __u32 ver_major;
-       __u32 ver_minor;
-       __u32 size;             /* total size of data passed in
-                                * including this struct */
-       __s32 ioctlfd;          /* automount command fd */
-
-       /* Command parameters */
-
-       union {
-               struct args_protover            protover;
-               struct args_protosubver         protosubver;
-               struct args_openmount           openmount;
-               struct args_ready               ready;
-               struct args_fail                fail;
-               struct args_setpipefd           setpipefd;
-               struct args_timeout             timeout;
-               struct args_requester           requester;
-               struct args_expire              expire;
-               struct args_askumount           askumount;
-               struct args_ismountpoint        ismountpoint;
-       };
-
-       char path[0];
-};
-
-static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
-{
-       memset(in, 0, sizeof(struct autofs_dev_ioctl));
-       in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
-       in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
-       in->size = sizeof(struct autofs_dev_ioctl);
-       in->ioctlfd = -1;
-}
-
-/*
- * If you change this make sure you make the corresponding change
- * to autofs-dev-ioctl.c:lookup_ioctl()
- */
-enum {
-       /* Get various version info */
-       AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
-       AUTOFS_DEV_IOCTL_PROTOVER_CMD,
-       AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
-
-       /* Open mount ioctl fd */
-       AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
-
-       /* Close mount ioctl fd */
-       AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
-
-       /* Mount/expire status returns */
-       AUTOFS_DEV_IOCTL_READY_CMD,
-       AUTOFS_DEV_IOCTL_FAIL_CMD,
-
-       /* Activate/deactivate autofs mount */
-       AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
-       AUTOFS_DEV_IOCTL_CATATONIC_CMD,
-
-       /* Expiry timeout */
-       AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
-
-       /* Get mount last requesting uid and gid */
-       AUTOFS_DEV_IOCTL_REQUESTER_CMD,
-
-       /* Check for eligible expire candidates */
-       AUTOFS_DEV_IOCTL_EXPIRE_CMD,
-
-       /* Request busy status */
-       AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
-
-       /* Check if path is a mountpoint */
-       AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
-};
-
-#define AUTOFS_IOCTL 0x93
-
-#define AUTOFS_DEV_IOCTL_VERSION \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_PROTOVER \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_OPENMOUNT \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_READY \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_FAIL \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_SETPIPEFD \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_CATATONIC \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_TIMEOUT \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_REQUESTER \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_EXPIRE \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
-       _IOWR(AUTOFS_IOCTL, \
-             AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
-
+#include <uapi/linux/auto_dev-ioctl.h>
 #endif /* _LINUX_AUTO_DEV_IOCTL_H */
index b4066bb890836c88be9b41f30de83f3d809114db..b8f814c95cf5108df246902b86e4d06072c1c70e 100644 (file)
@@ -10,7 +10,6 @@
 #define _LINUX_AUTO_FS_H
 
 #include <linux/fs.h>
-#include <linux/limits.h>
 #include <linux/ioctl.h>
 #include <uapi/linux/auto_fs.h>
 #endif /* _LINUX_AUTO_FS_H */
index 299e76b59fe9b0330254f608863f7237ed97c802..a83c822c35c2424095b512dd04f95609cadfdaa0 100644 (file)
@@ -65,16 +65,6 @@ static inline int get_bitmask_order(unsigned int count)
        return order;   /* We could be slightly more clever with -1 here... */
 }
 
-static inline int get_count_order(unsigned int count)
-{
-       int order;
-
-       order = fls(count) - 1;
-       if (count & (count - 1))
-               order++;
-       return order;
-}
-
 static __always_inline unsigned long hweight_long(unsigned long w)
 {
        return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
@@ -191,6 +181,32 @@ static inline unsigned fls_long(unsigned long l)
        return fls64(l);
 }
 
+static inline int get_count_order(unsigned int count)
+{
+       int order;
+
+       order = fls(count) - 1;
+       if (count & (count - 1))
+               order++;
+       return order;
+}
+
+/**
+ * get_count_order_long - get order after rounding @l up to power of 2
+ * @l: parameter
+ *
+ * it is same as get_count_order() but with long type parameter
+ */
+static inline int get_count_order_long(unsigned long l)
+{
+       if (l == 0UL)
+               return -1;
+       else if (l & (l - 1UL))
+               return (int)fls_long(l);
+       else
+               return (int)fls_long(l) - 1;
+}
+
 /**
  * __ffs64 - find first set bit in a 64 bit word
  * @word: The 64 bit word
index f9be3269171801524f20e56fe5ea8e7879149132..962164d3650675923e7099da44ba6dc91332c689 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/mmzone.h>
 #include <linux/mm_types.h>
 #include <asm/dma.h>
+#include <asm/processor.h>
 
 /*
  *  simple boot-time physical memory area allocator.
@@ -119,6 +120,10 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 #define BOOTMEM_LOW_LIMIT __pa(MAX_DMA_ADDRESS)
 #endif
 
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
+#endif
+
 #define alloc_bootmem(x) \
        __alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_align(x, align) \
@@ -180,10 +185,6 @@ static inline void * __init memblock_virt_alloc_nopanic(
                                                    NUMA_NO_NODE);
 }
 
-#ifndef ARCH_LOW_ADDRESS_LIMIT
-#define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
-#endif
-
 static inline void * __init memblock_virt_alloc_low(
                                        phys_addr_t size, phys_addr_t align)
 {
index d4e106b5dc277a387f159b40770b5f8f2c348038..0d8415820fc35409fbabcbef3bfe05c4cfa99546 100644 (file)
@@ -6,8 +6,10 @@
  * Lower value means higher priority, analogically to reclaim priority.
  */
 enum compact_priority {
+       COMPACT_PRIO_SYNC_FULL,
+       MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_FULL,
        COMPACT_PRIO_SYNC_LIGHT,
-       MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT,
+       MIN_COMPACT_COSTLY_PRIORITY = COMPACT_PRIO_SYNC_LIGHT,
        DEF_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT,
        COMPACT_PRIO_ASYNC,
        INIT_COMPACT_PRIORITY = COMPACT_PRIO_ASYNC
@@ -49,14 +51,37 @@ enum compact_result {
        COMPACT_CONTENDED,
 
        /*
-        * direct compaction partially compacted a zone and there might be
-        * suitable pages
+        * direct compaction terminated after concluding that the allocation
+        * should now succeed
         */
-       COMPACT_PARTIAL,
+       COMPACT_SUCCESS,
 };
 
 struct alloc_context; /* in mm/internal.h */
 
+/*
+ * Number of free order-0 pages that should be available above given watermark
+ * to make sure compaction has reasonable chance of not running out of free
+ * pages that it needs to isolate as migration target during its work.
+ */
+static inline unsigned long compact_gap(unsigned int order)
+{
+       /*
+        * Although all the isolations for migration are temporary, compaction
+        * free scanner may have up to 1 << order pages on its list and then
+        * try to split an (order - 1) free page. At that point, a gap of
+        * 1 << order might not be enough, so it's safer to require twice that
+        * amount. Note that the number of pages on the list is also
+        * effectively limited by COMPACT_CLUSTER_MAX, as that's the maximum
+        * that the migrate scanner can have isolated on migrate list, and free
+        * scanner is only invoked when the number of isolated free pages is
+        * lower than that. But it's not worth to complicate the formula here
+        * as a bigger gap for higher orders than strictly necessary can also
+        * improve chances of compaction success.
+        */
+       return 2UL << order;
+}
+
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -70,7 +95,6 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
                unsigned int order, unsigned int alloc_flags,
                const struct alloc_context *ac, enum compact_priority prio);
-extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern enum compact_result compaction_suitable(struct zone *zone, int order,
                unsigned int alloc_flags, int classzone_idx);
@@ -89,7 +113,7 @@ static inline bool compaction_made_progress(enum compact_result result)
         * that the compaction successfully isolated and migrated some
         * pageblocks.
         */
-       if (result == COMPACT_PARTIAL)
+       if (result == COMPACT_SUCCESS)
                return true;
 
        return false;
@@ -154,10 +178,6 @@ extern void kcompactd_stop(int nid);
 extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx);
 
 #else
-static inline void compact_pgdat(pg_data_t *pgdat, int order)
-{
-}
-
 static inline void reset_isolation_suitable(pg_data_t *pgdat)
 {
 }
index f964ef79e0adfc387c46026e05ba52c829c3baa1..63609398ef9f21bca890619d29be4329df4c079e 100644 (file)
@@ -432,7 +432,6 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
 
 asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
 
-extern __printf(1, 2) int compat_printk(const char *fmt, ...);
 extern void sigset_from_compat(sigset_t *set, const compat_sigset_t *compat);
 extern void sigset_to_compat(compat_sigset_t *compat, const sigset_t *set);
 
index d530c4627e54ef1091be820a78d3616166ca8906..3672809234a728ea9e7779b0456cbd57647d7150 100644 (file)
@@ -173,6 +173,12 @@ static inline void console_sysfs_notify(void)
 #endif
 extern bool console_suspend_enabled;
 
+#ifdef CONFIG_OF
+extern void console_set_by_of(void);
+#else
+static inline void console_set_by_of(void) {}
+#endif
+
 /* Suspend and resume console messages over PM events */
 extern void suspend_console(void);
 extern void resume_console(void);
index 7572d9e9dced921e1732226a2c00f5802f58c735..b886dc17f2f3457db43a2523aacb35e74e49e75b 100644 (file)
@@ -231,6 +231,11 @@ void cpu_startup_entry(enum cpuhp_state state);
 
 void cpu_idle_poll_ctrl(bool enable);
 
+/* Attach to any functions which should be considered cpuidle. */
+#define __cpuidle      __attribute__((__section__(".cpuidle.text")))
+
+bool cpu_in_idle(unsigned long pc);
+
 void arch_cpu_idle(void);
 void arch_cpu_idle_prepare(void);
 void arch_cpu_idle_enter(void);
diff --git a/include/linux/crc64_ecma.h b/include/linux/crc64_ecma.h
new file mode 100644 (file)
index 0000000..bba7a4d
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CRC64_ECMA_H_
+#define __CRC64_ECMA_H_
+
+#include <linux/types.h>
+
+
+#define CRC64_DEFAULT_INITVAL           0xFFFFFFFFFFFFFFFFULL
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * @pdata:     pointer to the data to compute checksum for.
+ * @nbytes:    number of bytes in data buffer.
+ * @seed:      CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed);
+
+#endif /* __CRC64_ECMA_H_ */
index 257db64562e54092adec7bc506b6db7325f86c3e..f0e70a1bb3acfe784148ba468a3577d23065a9a7 100644 (file)
@@ -26,15 +26,10 @@ struct inode;
 /*
  * COW Supplementary groups list
  */
-#define NGROUPS_SMALL          32
-#define NGROUPS_PER_BLOCK      ((unsigned int)(PAGE_SIZE / sizeof(kgid_t)))
-
 struct group_info {
        atomic_t        usage;
        int             ngroups;
-       int             nblocks;
-       kgid_t          small_block[NGROUPS_SMALL];
-       kgid_t          *blocks[0];
+       kgid_t          gid[0];
 };
 
 /**
@@ -88,10 +83,6 @@ extern void set_groups(struct cred *, struct group_info *);
 extern int groups_search(const struct group_info *, kgid_t);
 extern bool may_setgroups(void);
 
-/* access the groups "array" with this macro */
-#define GROUP_AT(gi, i) \
-       ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK])
-
 /*
  * The security context of a task
  *
index 653589e3e30e8b03dd1844011bae7ca19e41136b..f13e4ff6835aed2e746792340590995a53f010b8 100644 (file)
@@ -22,7 +22,10 @@ extern const unsigned char _ctype[];
 #define isalnum(c)     ((__ismask(c)&(_U|_L|_D)) != 0)
 #define isalpha(c)     ((__ismask(c)&(_U|_L)) != 0)
 #define iscntrl(c)     ((__ismask(c)&(_C)) != 0)
-#define isdigit(c)     ((__ismask(c)&(_D)) != 0)
+static inline int isdigit(int c)
+{
+       return '0' <= c && c <= '9';
+}
 #define isgraph(c)     ((__ismask(c)&(_P|_U|_L|_D)) != 0)
 #define islower(c)     ((__ismask(c)&(_L)) != 0)
 #define isprint(c)     ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0)
index 66533e18276cf00e86de1fa8f9251c30f921fcf7..6efbd273a152059ded12cdc00cfc0094dd2e8fec 100644 (file)
  * that gives better TLB efficiency.
  */
 #define DMA_ATTR_ALLOC_SINGLE_PAGES    (1UL << 7)
+/*
+ * DMA_ATTR_NO_WARN: This tells the DMA-mapping subsystem to suppress
+ * allocation failure reports (similarly to __GFP_NOWARN).
+ */
+#define DMA_ATTR_NO_WARN       (1UL << 8)
 
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.
index 6f14de45b5ce0807e01f7c5093d46ab5c9a546bb..9b9f65d9987393d456911f41eacb4bdfa9fe0284 100644 (file)
@@ -87,6 +87,10 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 
 extern unsigned long transparent_hugepage_flags;
 
+extern unsigned long thp_get_unmapped_area(struct file *filp,
+               unsigned long addr, unsigned long len, unsigned long pgoff,
+               unsigned long flags);
+
 extern void prep_transhuge_page(struct page *page);
 extern void free_transhuge_page(struct page *page);
 
@@ -152,8 +156,8 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
        return is_huge_zero_page(pmd_page(pmd));
 }
 
-struct page *get_huge_zero_page(void);
-void put_huge_zero_page(void);
+struct page *mm_get_huge_zero_page(struct mm_struct *mm);
+void mm_put_huge_zero_page(struct mm_struct *mm);
 
 #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
 
@@ -169,6 +173,9 @@ void put_huge_zero_page(void);
 static inline void prep_transhuge_page(struct page *page) {}
 
 #define transparent_hugepage_flags 0UL
+
+#define thp_get_unmapped_area  NULL
+
 static inline int
 split_huge_page_to_list(struct page *page, struct list_head *list)
 {
@@ -213,9 +220,9 @@ static inline bool is_huge_zero_page(struct page *page)
        return false;
 }
 
-static inline void put_huge_zero_page(void)
+static inline void mm_put_huge_zero_page(struct mm_struct *mm)
 {
-       BUILD_BUG();
+       return;
 }
 
 static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
index 22a72198c14b7e338f4f10ed334f7c73be8cfcae..d9015dea55cdf6e4d66cf64a9dc00ee0864771d6 100644 (file)
@@ -2,7 +2,7 @@
 #define __LINUX_KBUILD_H
 
 #define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+       asm volatile ("#define " #sym " %0 /*" #val :: "i" (val))
 
 #define BLANK() asm volatile("\n->" : : )
 
index d96a6118d26a91b470a0fba9b7ad8b6b8b1d0087..853c4bf027ea674543d76946d7aa973ef91b3e71 100644 (file)
@@ -736,17 +736,25 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * strict type-checking.. See the
  * "unnecessary" pointer comparison.
  */
-#define min(x, y) ({                           \
-       typeof(x) _min1 = (x);                  \
-       typeof(y) _min2 = (y);                  \
-       (void) (&_min1 == &_min2);              \
-       _min1 < _min2 ? _min1 : _min2; })
-
-#define max(x, y) ({                           \
-       typeof(x) _max1 = (x);                  \
-       typeof(y) _max2 = (y);                  \
-       (void) (&_max1 == &_max2);              \
-       _max1 > _max2 ? _max1 : _max2; })
+#define __min(t1, t2, min1, min2, x, y) ({             \
+       t1 min1 = (x);                                  \
+       t2 min2 = (y);                                  \
+       (void) (&min1 == &min2);                        \
+       min1 < min2 ? min1 : min2; })
+#define min(x, y)                                      \
+       __min(typeof(x), typeof(y),                     \
+             __UNIQUE_ID(min1_), __UNIQUE_ID(min2_),   \
+             x, y)
+
+#define __max(t1, t2, max1, max2, x, y) ({             \
+       t1 max1 = (x);                                  \
+       t2 max2 = (y);                                  \
+       (void) (&max1 == &max2);                        \
+       max1 > max2 ? max1 : max2; })
+#define max(x, y)                                      \
+       __max(typeof(x), typeof(y),                     \
+             __UNIQUE_ID(max1_), __UNIQUE_ID(max2_),   \
+             x, y)
 
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
 #define max3(x, y, z) max((typeof(x))max(x, y), z)
@@ -778,15 +786,15 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  *
  * Or not use min/max/clamp at all, of course.
  */
-#define min_t(type, x, y) ({                   \
-       type __min1 = (x);                      \
-       type __min2 = (y);                      \
-       __min1 < __min2 ? __min1: __min2; })
-
-#define max_t(type, x, y) ({                   \
-       type __max1 = (x);                      \
-       type __max2 = (y);                      \
-       __max1 > __max2 ? __max1: __max2; })
+#define min_t(type, x, y)                              \
+       __min(type, type,                               \
+             __UNIQUE_ID(min1_), __UNIQUE_ID(min2_),   \
+             x, y)
+
+#define max_t(type, x, y)                              \
+       __max(type, type,                               \
+             __UNIQUE_ID(min1_), __UNIQUE_ID(min2_),   \
+             x, y)
 
 /**
  * clamp_t - return a value clamped to a given range using a given type
index d7437777baaafe382458df9115b005f764a064f5..d3ae4292931b790415b449d0fad19c017823f4ca 100644 (file)
@@ -259,6 +259,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
        vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
 #define VMCOREINFO_CONFIG(name) \
        vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+#define VMCOREINFO_PHYS_BASE(value) \
+       vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
 
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
index 2925da23505d1dd7939060b471e4efc266cacaef..5b759c9acf97b479e2c9438a19bc61ed4cd5b9a0 100644 (file)
@@ -328,6 +328,7 @@ phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
                                  phys_addr_t max_addr);
 phys_addr_t memblock_phys_mem_size(void);
+phys_addr_t memblock_reserved_size(void);
 phys_addr_t memblock_mem_size(unsigned long limit_pfn);
 phys_addr_t memblock_start_of_DRAM(void);
 phys_addr_t memblock_end_of_DRAM(void);
index 5d8ca6e02e396bd1eb5a00118ecf6141c43900ac..0710143723bce194e7b77d80ef4314adb5e9e1d6 100644 (file)
@@ -366,6 +366,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
                                   struct mem_cgroup *,
                                   struct mem_cgroup_reclaim_cookie *);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
+int mem_cgroup_scan_tasks(struct mem_cgroup *,
+                         int (*)(struct task_struct *, void *), void *);
 
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
@@ -446,6 +448,8 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 
 void mem_cgroup_handle_over_high(void);
 
+unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg);
+
 void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
                                struct task_struct *p);
 
@@ -639,6 +643,12 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
 {
 }
 
+static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
+               int (*fn)(struct task_struct *, void *), void *arg)
+{
+       return 0;
+}
+
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
        return 0;
@@ -669,6 +679,11 @@ mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
        return 0;
 }
 
+static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
+{
+       return 0;
+}
+
 static inline void
 mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
index ef815b9cd42696bc70db1e9f35e39a9c295afacd..8bb9747c35957895b4171c78b5977df5e31cce8f 100644 (file)
@@ -1048,28 +1048,16 @@ struct address_space *page_file_mapping(struct page *page)
        return page->mapping;
 }
 
-/*
- * Return the pagecache index of the passed page.  Regular pagecache pages
- * use ->index whereas swapcache pages use ->private
- */
-static inline pgoff_t page_index(struct page *page)
-{
-       if (unlikely(PageSwapCache(page)))
-               return page_private(page);
-       return page->index;
-}
-
 extern pgoff_t __page_file_index(struct page *page);
 
 /*
- * Return the file index of the page. Regular pagecache pages use ->index
- * whereas swapcache pages use swp_offset(->private)
+ * Return the pagecache index of the passed page.  Regular pagecache pages
+ * use ->index whereas swapcache pages use swp_offset(->private)
  */
-static inline pgoff_t page_file_index(struct page *page)
+static inline pgoff_t page_index(struct page *page)
 {
        if (unlikely(PageSwapCache(page)))
                return __page_file_index(page);
-
        return page->index;
 }
 
@@ -1197,10 +1185,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
  * @pte_hole: if set, called for each hole at all levels
  * @hugetlb_entry: if set, called for each hugetlb entry
  * @test_walk: caller specific callback function to determine whether
- *             we walk over the current vma or not. A positive returned
+ *             we walk over the current vma or not. Returning 0
  *             value means "do page table walk over the current vma,"
  *             and a negative one means "abort current page table walk
- *             right now." 0 means "skip the current vma."
+ *             right now." 1 means "skip the current vma."
  * @mm:        mm_struct representing the target process of page table walk
  * @vma:       vma currently walked (NULL if walking outside vmas)
  * @private:   private data for callbacks' usage
@@ -1913,7 +1901,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
                                        struct mminit_pfnnid_cache *state);
 #endif
 
-extern void set_dma_reserve(unsigned long new_dma_reserve);
+extern void set_memory_reserve(unsigned long nr_reserve, bool inc);
 extern void memmap_init_zone(unsigned long, int, unsigned long,
                                unsigned long, enum memmap_context);
 extern void setup_per_zone_wmarks(void);
@@ -1924,6 +1912,9 @@ extern void show_mem(unsigned int flags);
 extern long si_mem_available(void);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
+#ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES
+extern unsigned long arch_reserved_kernel_pages(void);
+#endif
 
 extern __printf(3, 4)
 void warn_alloc_failed(gfp_t gfp_mask, unsigned int order,
index 903200f4ec41ce03c50c15bfa079d5a01f903afa..4a8acedf4b7d914b8474773328d09adb3d6bc2c4 100644 (file)
@@ -515,9 +515,7 @@ struct mm_struct {
 #ifdef CONFIG_HUGETLB_PAGE
        atomic_long_t hugetlb_usage;
 #endif
-#ifdef CONFIG_MMU
        struct work_struct async_put_work;
-#endif
 };
 
 static inline void mm_init_cpumask(struct mm_struct *mm)
index 4630eeae18e08df43be6567ca5fbcb2027643c65..a78c35cff1ae34c9c80f0f4d57895df870528aba 100644 (file)
@@ -35,21 +35,34 @@ static inline void hardlockup_detector_disable(void) {}
  * base function. Return whether such support was available,
  * to allow calling code to fall back to some other mechanism:
  */
-#ifdef arch_trigger_all_cpu_backtrace
+#ifdef arch_trigger_cpumask_backtrace
 static inline bool trigger_all_cpu_backtrace(void)
 {
-       arch_trigger_all_cpu_backtrace(true);
-
+       arch_trigger_cpumask_backtrace(cpu_online_mask, false);
        return true;
 }
+
 static inline bool trigger_allbutself_cpu_backtrace(void)
 {
-       arch_trigger_all_cpu_backtrace(false);
+       arch_trigger_cpumask_backtrace(cpu_online_mask, true);
+       return true;
+}
+
+static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
+{
+       arch_trigger_cpumask_backtrace(mask, false);
+       return true;
+}
+
+static inline bool trigger_single_cpu_backtrace(int cpu)
+{
+       arch_trigger_cpumask_backtrace(cpumask_of(cpu), false);
        return true;
 }
 
 /* generic implementation */
-void nmi_trigger_all_cpu_backtrace(bool include_self,
+void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
+                                  bool exclude_self,
                                   void (*raise)(cpumask_t *mask));
 bool nmi_cpu_backtrace(struct pt_regs *regs);
 
@@ -62,6 +75,14 @@ static inline bool trigger_allbutself_cpu_backtrace(void)
 {
        return false;
 }
+static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
+{
+       return false;
+}
+static inline bool trigger_single_cpu_backtrace(int cpu)
+{
+       return false;
+}
 #endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
index 5bc0457ee3a88955f64b750a06858d45cb74b5da..b4e36e92bc878f94d013b72eec7c44a16d8096e9 100644 (file)
@@ -34,23 +34,11 @@ struct oom_control {
         * for display purposes.
         */
        const int order;
-};
-
-/*
- * Types of limitations to the nodes from which allocations may occur
- */
-enum oom_constraint {
-       CONSTRAINT_NONE,
-       CONSTRAINT_CPUSET,
-       CONSTRAINT_MEMORY_POLICY,
-       CONSTRAINT_MEMCG,
-};
 
-enum oom_scan_t {
-       OOM_SCAN_OK,            /* scan thread and find its badness */
-       OOM_SCAN_CONTINUE,      /* do not consider thread for oom kill */
-       OOM_SCAN_ABORT,         /* abort the iteration and return */
-       OOM_SCAN_SELECT,        /* always select this thread first */
+       /* Used by oom implementation, do not set */
+       unsigned long totalpages;
+       struct task_struct *chosen;
+       unsigned long chosen_points;
 };
 
 extern struct mutex oom_lock;
@@ -70,45 +58,27 @@ static inline bool oom_task_origin(const struct task_struct *p)
        return p->signal->oom_flag_origin;
 }
 
-extern void mark_oom_victim(struct task_struct *tsk);
-
-#ifdef CONFIG_MMU
-extern void wake_oom_reaper(struct task_struct *tsk);
-#else
-static inline void wake_oom_reaper(struct task_struct *tsk)
+static inline bool tsk_is_oom_victim(struct task_struct * tsk)
 {
+       return tsk->signal->oom_mm;
 }
-#endif
 
 extern unsigned long oom_badness(struct task_struct *p,
                struct mem_cgroup *memcg, const nodemask_t *nodemask,
                unsigned long totalpages);
 
-extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
-                            unsigned int points, unsigned long totalpages,
-                            const char *message);
-
-extern void check_panic_on_oom(struct oom_control *oc,
-                              enum oom_constraint constraint);
-
-extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
-                                              struct task_struct *task);
-
 extern bool out_of_memory(struct oom_control *oc);
 
-extern void exit_oom_victim(struct task_struct *tsk);
+extern void exit_oom_victim(void);
 
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
 
-extern bool oom_killer_disabled;
-extern bool oom_killer_disable(void);
+extern bool oom_killer_disable(signed long timeout);
 extern void oom_killer_enable(void);
 
 extern struct task_struct *find_lock_task_mm(struct task_struct *p);
 
-bool task_will_free_mem(struct task_struct *task);
-
 /* sysctls */
 extern int sysctl_oom_dump_tasks;
 extern int sysctl_oom_kill_allocating_task;
index 03f2a3e7d76d40ee457b8ab6f2d140d1ee9475b1..9298c393ddaa24a63c849a6c811d5d4110c1a410 100644 (file)
@@ -7,6 +7,8 @@
 
 struct pglist_data;
 struct page_ext_operations {
+       size_t offset;
+       size_t size;
        bool (*need)(void);
        void (*init)(void);
 };
@@ -42,12 +44,6 @@ enum page_ext_flags {
  */
 struct page_ext {
        unsigned long flags;
-#ifdef CONFIG_PAGE_OWNER
-       unsigned int order;
-       gfp_t gfp_mask;
-       int last_migrate_reason;
-       depot_stack_handle_t handle;
-#endif
 };
 
 extern void pgdat_page_ext_init(struct pglist_data *pgdat);
index 30583ab0ffb1f8da50d6a3a5cd8e0277af78a304..2be728d156b5555a6331a89798bd4b0bfc2a9562 100644 (file)
@@ -14,6 +14,8 @@ extern void __split_page_owner(struct page *page, unsigned int order);
 extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
 extern void __set_page_owner_migrate_reason(struct page *page, int reason);
 extern void __dump_page_owner(struct page *page);
+extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
+                                       pg_data_t *pgdat, struct zone *zone);
 
 static inline void reset_page_owner(struct page *page, unsigned int order)
 {
index 66a1260b33de9a48fe0dcb2c1d4460c44950c457..6799d109c49ea19b775a226b87ccc42e7d8a0cf5 100644 (file)
@@ -25,6 +25,10 @@ enum mapping_flags {
        AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */
        AS_UNEVICTABLE  = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */
        AS_EXITING      = __GFP_BITS_SHIFT + 4, /* final truncate in progress */
+       /* writeback related tags are not used */
+       AS_NO_WRITEBACK_TAGS = __GFP_BITS_SHIFT + 5,
+
+       AS_LAST_FLAG,
 };
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
@@ -64,6 +68,16 @@ static inline int mapping_exiting(struct address_space *mapping)
        return test_bit(AS_EXITING, &mapping->flags);
 }
 
+static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
+{
+       set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
+}
+
+static inline int mapping_use_writeback_tags(struct address_space *mapping)
+{
+       return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
+}
+
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
        return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
@@ -396,7 +410,7 @@ static inline loff_t page_offset(struct page *page)
 
 static inline loff_t page_file_offset(struct page *page)
 {
-       return ((loff_t)page_file_index(page)) << PAGE_SHIFT;
+       return ((loff_t)page_index(page)) << PAGE_SHIFT;
 }
 
 extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
index 4c45105dece3879f943219276a2db81c4656ce9f..4613bf35c311ec4a59abec3945138597169e99db 100644 (file)
@@ -461,6 +461,14 @@ static inline struct radix_tree_node *entry_to_node(void *ptr)
  *
  * This function updates @iter->index in the case of a successful lookup.
  * For tagged lookup it also eats @iter->tags.
+ *
+ * There are several cases where 'slot' can be passed in as NULL to this
+ * function.  These cases result from the use of radix_tree_iter_next() or
+ * radix_tree_iter_retry().  In these cases we don't end up dereferencing
+ * 'slot' because either:
+ * a) we are doing tagged iteration and iter->tags has been set to 0, or
+ * b) we are doing non-tagged iteration, and iter->index and iter->next_index
+ *    have been set up so that radix_tree_chunk_size() returns 1 or 0.
  */
 static __always_inline void **
 radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
index d80a4388a4fd793b0113675c1f8b42cb86b5506e..7bd2403e4fef1ad7fb0a5f03b4e104e96234d26b 100644 (file)
@@ -45,7 +45,7 @@ extern const struct file_operations random_fops, urandom_fops;
 
 unsigned int get_random_int(void);
 unsigned long get_random_long(void);
-unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
+unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);
 void prandom_bytes(void *buf, size_t nbytes);
index ecbb34a382b898cabf40d04237fbfae5534c5bc0..ab0c59e947104e053996fb4141306f79744e92b6 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/poll.h>
 #include <linux/kref.h>
 #include <linux/percpu.h>
+#include <linux/irq_work.h>
 
 /*
  * Tracks changes to rchan/rchan_buf structs
@@ -38,7 +39,7 @@ struct rchan_buf
        size_t subbufs_consumed;        /* count of sub-buffers consumed */
        struct rchan *chan;             /* associated channel */
        wait_queue_head_t read_wait;    /* reader wait queue */
-       struct timer_list timer;        /* reader wake-up timer */
+       struct irq_work wakeup_work;    /* reader wakeup */
        struct dentry *dentry;          /* channel file dentry */
        struct kref kref;               /* channel buffer refcount */
        struct page **page_array;       /* array of current buffer pages */
index 06bd6ab542313770f01251b1af0400086f51c97f..af39baf764ddf68a6aa833c05ad278ad61f28ab9 100644 (file)
@@ -522,8 +522,9 @@ static inline int get_dumpable(struct mm_struct *mm)
 
 #define MMF_HAS_UPROBES                19      /* has uprobes */
 #define MMF_RECALC_UPROBES     20      /* MMF_HAS_UPROBES can be wrong */
-#define MMF_OOM_REAPED         21      /* mm has been already reaped */
-#define MMF_OOM_NOT_REAPABLE   22      /* mm couldn't be reaped */
+#define MMF_OOM_SKIP           21      /* mm is of no interest for the OOM killer */
+#define MMF_UNSTABLE           22      /* mm is unstable for copy_from_user */
+#define MMF_HUGE_ZERO_PAGE     23      /* mm has ever used the global huge zero page */
 
 #define MMF_INIT_MASK          (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
@@ -671,7 +672,6 @@ struct signal_struct {
        atomic_t                sigcnt;
        atomic_t                live;
        int                     nr_threads;
-       atomic_t oom_victims; /* # of TIF_MEDIE threads in this thread group */
        struct list_head        thread_head;
 
        wait_queue_head_t       wait_chldexit;  /* for wait4() */
@@ -804,6 +804,8 @@ struct signal_struct {
        short oom_score_adj;            /* OOM kill score adjustment */
        short oom_score_adj_min;        /* OOM kill score adjustment min value.
                                         * Only settable by CAP_SYS_RESOURCE. */
+       struct mm_struct *oom_mm;       /* recorded mm when the thread group got
+                                        * killed by the oom killer */
 
        struct mutex cred_guard_mutex;  /* guard against foreign influences on
                                         * credential calculations
@@ -2848,6 +2850,20 @@ static inline void mmdrop(struct mm_struct *mm)
                __mmdrop(mm);
 }
 
+static inline void mmdrop_async_fn(struct work_struct *work)
+{
+       struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
+       __mmdrop(mm);
+}
+
+static inline void mmdrop_async(struct mm_struct *mm)
+{
+       if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
+               INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
+               schedule_work(&mm->async_put_work);
+       }
+}
+
 static inline bool mmget_not_zero(struct mm_struct *mm)
 {
        return atomic_inc_not_zero(&mm->mm_users);
index 976ce3a19f1b23646c4494029929e538f5e0204b..d0efd6e6c20a6a6a39273639dbd33f3d77c2e156 100644 (file)
@@ -21,6 +21,7 @@ struct sem_array {
        struct list_head        list_id;        /* undo requests on this array */
        int                     sem_nsems;      /* no. of semaphores in array */
        int                     complex_count;  /* pending complex operations */
+       bool                    complex_mode;   /* no parallel simple ops */
 };
 
 #ifdef CONFIG_SYSVIPC
index f3d45dd42695e1f813d67103a118dc03d27cdcbc..e305b66a9fb994904cdd87b046cca7822e62cef1 100644 (file)
@@ -117,9 +117,9 @@ __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
 void seq_puts(struct seq_file *m, const char *s);
-void seq_put_decimal_ull(struct seq_file *m, char delimiter,
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
                         unsigned long long num);
-void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num);
+void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num);
 void seq_escape(struct seq_file *m, const char *s, const char *esc);
 
 void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
index b17cc4830fa670512abdc3987f58fc55e583f0bc..ed41bec3d12893822bf88753b6e4f0c4bfb8bdb5 100644 (file)
@@ -191,6 +191,11 @@ struct percpu_cluster {
        unsigned int next; /* Likely next allocation offset */
 };
 
+struct swap_cluster_list {
+       struct swap_cluster_info head;
+       struct swap_cluster_info tail;
+};
+
 /*
  * The in-memory structure used to track swap areas.
  */
@@ -203,8 +208,7 @@ struct swap_info_struct {
        unsigned int    max;            /* extent of the swap_map */
        unsigned char *swap_map;        /* vmalloc'ed array of usage counts */
        struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
-       struct swap_cluster_info free_cluster_head; /* free cluster list head */
-       struct swap_cluster_info free_cluster_tail; /* free cluster list tail */
+       struct swap_cluster_list free_clusters; /* free clusters list */
        unsigned int lowest_bit;        /* index of first free in swap_map */
        unsigned int highest_bit;       /* index of last free in swap_map */
        unsigned int pages;             /* total of usable pages of swap */
@@ -235,8 +239,7 @@ struct swap_info_struct {
                                         * first.
                                         */
        struct work_struct discard_work; /* discard worker */
-       struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */
-       struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */
+       struct swap_cluster_list discard_clusters; /* discard clusters list */
 };
 
 /* linux/mm/workingset.c */
index fc1e16c25a296877e85d37d267bd953929ee93cd..797100e100109756f68a4248390b7ad4dfab0617 100644 (file)
@@ -319,7 +319,6 @@ void laptop_mode_timer_fn(unsigned long data);
 #else
 static inline void laptop_sync_completion(void) { }
 #endif
-void throttle_vm_writeout(gfp_t gfp_mask);
 bool node_dirty_ok(struct pglist_data *pgdat);
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 #ifdef CONFIG_CGROUP_WRITEBACK
index c2ba402ab25651009c5b67e90145b81124fd110e..cbdb90b6b30847670041434cfe546bb62223b77c 100644 (file)
@@ -13,7 +13,7 @@
        EM( COMPACT_SKIPPED,            "skipped")              \
        EM( COMPACT_DEFERRED,           "deferred")             \
        EM( COMPACT_CONTINUE,           "continue")             \
-       EM( COMPACT_PARTIAL,            "partial")              \
+       EM( COMPACT_SUCCESS,            "success")              \
        EM( COMPACT_PARTIAL_SKIPPED,    "partial_skipped")      \
        EM( COMPACT_COMPLETE,           "complete")             \
        EM( COMPACT_NO_SUITABLE_PAGE,   "no_suitable_page")     \
diff --git a/include/trace/events/zsmalloc.h b/include/trace/events/zsmalloc.h
new file mode 100644 (file)
index 0000000..772cf65
--- /dev/null
@@ -0,0 +1,76 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM zsmalloc
+
+#if !defined(_TRACE_ZSMALLOC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ZSMALLOC_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(zs_compact_start,
+
+       TP_PROTO(const char *pool_name),
+
+       TP_ARGS(pool_name),
+
+       TP_STRUCT__entry(
+               __field(const char *, pool_name)
+       ),
+
+       TP_fast_assign(
+               __entry->pool_name = pool_name;
+       ),
+
+       TP_printk("pool %s",
+                 __entry->pool_name)
+);
+
+TRACE_EVENT(zs_compact_end,
+
+       TP_PROTO(const char *pool_name, unsigned long pages_compacted),
+
+       TP_ARGS(pool_name, pages_compacted),
+
+       TP_STRUCT__entry(
+               __field(const char *, pool_name)
+               __field(unsigned long, pages_compacted)
+       ),
+
+       TP_fast_assign(
+               __entry->pool_name = pool_name;
+               __entry->pages_compacted = pages_compacted;
+       ),
+
+       TP_printk("pool %s: %ld pages compacted",
+                 __entry->pool_name,
+                 __entry->pages_compacted)
+);
+
+TRACE_EVENT(zs_compact,
+
+       TP_PROTO(int class, unsigned long nr_migrated_obj, unsigned long nr_freed_pages),
+
+       TP_ARGS(class, nr_migrated_obj, nr_freed_pages),
+
+       TP_STRUCT__entry(
+               __field(int, class)
+               __field(unsigned long, nr_migrated_obj)
+               __field(unsigned long, nr_freed_pages)
+       ),
+
+       TP_fast_assign(
+               __entry->class = class;
+               __entry->nr_migrated_obj = nr_migrated_obj;
+               __entry->nr_freed_pages = nr_freed_pages;
+       ),
+
+       TP_printk("class %3d: %ld objects migrated, %ld pages freed",
+                 __entry->class,
+                 __entry->nr_migrated_obj,
+                 __entry->nr_freed_pages)
+);
+
+#endif /* _TRACE_ZSMALLOC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h
new file mode 100644 (file)
index 0000000..021ed33
--- /dev/null
@@ -0,0 +1,221 @@
+/*
+ * Copyright 2008 Red Hat, Inc. All rights reserved.
+ * Copyright 2008 Ian Kent <raven@themaw.net>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#ifndef _UAPI_LINUX_AUTO_DEV_IOCTL_H
+#define _UAPI_LINUX_AUTO_DEV_IOCTL_H
+
+#include <linux/auto_fs.h>
+#include <linux/string.h>
+
+#define AUTOFS_DEVICE_NAME             "autofs"
+
+#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0
+
+#define AUTOFS_DEV_IOCTL_SIZE          sizeof(struct autofs_dev_ioctl)
+
+/*
+ * An ioctl interface for autofs mount point control.
+ */
+
+struct args_protover {
+       __u32   version;
+};
+
+struct args_protosubver {
+       __u32   sub_version;
+};
+
+struct args_openmount {
+       __u32   devid;
+};
+
+struct args_ready {
+       __u32   token;
+};
+
+struct args_fail {
+       __u32   token;
+       __s32   status;
+};
+
+struct args_setpipefd {
+       __s32   pipefd;
+};
+
+struct args_timeout {
+       __u64   timeout;
+};
+
+struct args_requester {
+       __u32   uid;
+       __u32   gid;
+};
+
+struct args_expire {
+       __u32   how;
+};
+
+struct args_askumount {
+       __u32   may_umount;
+};
+
+struct args_ismountpoint {
+       union {
+               struct args_in {
+                       __u32   type;
+               } in;
+               struct args_out {
+                       __u32   devid;
+                       __u32   magic;
+               } out;
+       };
+};
+
+/*
+ * All the ioctls use this structure.
+ * When sending a path size must account for the total length
+ * of the chunk of memory otherwise is is the size of the
+ * structure.
+ */
+
+struct autofs_dev_ioctl {
+       __u32 ver_major;
+       __u32 ver_minor;
+       __u32 size;             /* total size of data passed in
+                                * including this struct */
+       __s32 ioctlfd;          /* automount command fd */
+
+       /* Command parameters */
+
+       union {
+               struct args_protover            protover;
+               struct args_protosubver         protosubver;
+               struct args_openmount           openmount;
+               struct args_ready               ready;
+               struct args_fail                fail;
+               struct args_setpipefd           setpipefd;
+               struct args_timeout             timeout;
+               struct args_requester           requester;
+               struct args_expire              expire;
+               struct args_askumount           askumount;
+               struct args_ismountpoint        ismountpoint;
+       };
+
+       char path[0];
+};
+
+static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
+{
+       memset(in, 0, sizeof(struct autofs_dev_ioctl));
+       in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+       in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+       in->size = sizeof(struct autofs_dev_ioctl);
+       in->ioctlfd = -1;
+}
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to autofs-dev-ioctl.c:lookup_ioctl()
+ */
+enum {
+       /* Get various version info */
+       AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
+       AUTOFS_DEV_IOCTL_PROTOVER_CMD,
+       AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
+
+       /* Open mount ioctl fd */
+       AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
+
+       /* Close mount ioctl fd */
+       AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
+
+       /* Mount/expire status returns */
+       AUTOFS_DEV_IOCTL_READY_CMD,
+       AUTOFS_DEV_IOCTL_FAIL_CMD,
+
+       /* Activate/deactivate autofs mount */
+       AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
+       AUTOFS_DEV_IOCTL_CATATONIC_CMD,
+
+       /* Expiry timeout */
+       AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
+
+       /* Get mount last requesting uid and gid */
+       AUTOFS_DEV_IOCTL_REQUESTER_CMD,
+
+       /* Check for eligible expire candidates */
+       AUTOFS_DEV_IOCTL_EXPIRE_CMD,
+
+       /* Request busy status */
+       AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
+
+       /* Check if path is a mountpoint */
+       AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
+};
+
+#define AUTOFS_IOCTL 0x93
+
+#define AUTOFS_DEV_IOCTL_VERSION \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOVER \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_OPENMOUNT \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_READY \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_FAIL \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_SETPIPEFD \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CATATONIC \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_TIMEOUT \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_REQUESTER \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_EXPIRE \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
+       _IOWR(AUTOFS_IOCTL, \
+             AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
+
+#endif /* _UAPI_LINUX_AUTO_DEV_IOCTL_H */
index 9175a1b4dc69a41301f0576939eeb78e75e02887..1bfc3ed8b2841d7a5c099f1a2bd3310720720a6a 100644 (file)
@@ -12,6 +12,7 @@
 #define _UAPI_LINUX_AUTO_FS_H
 
 #include <linux/types.h>
+#include <linux/limits.h>
 #ifndef __KERNEL__
 #include <sys/ioctl.h>
 #endif /* __KERNEL__ */
index 2858be732f6d25dd8431cd994645c8c6af3828c2..20dacff539072856f7ea700505d36393abb75bc2 100644 (file)
@@ -59,6 +59,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/device.h>
 #include <linux/kthread.h>
+#include <linux/pagemap.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
@@ -463,6 +464,9 @@ void __init __weak thread_stack_cache_init(void)
  */
 static void __init mm_init(void)
 {
+       /* Does address_space.flags still fit into a 32-bit ulong? */
+       BUILD_BUG_ON(AS_LAST_FLAG > 32);
+
        /*
         * page_ext requires contiguous pages,
         * bigger than MAX_ORDER unless SPARSEMEM.
index c6521c205cb403a81cc2bf4e6969b2a5d54620c3..1ce3d180c58ffa4f50a078c98f73eb418b81ac6e 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -51,19 +51,14 @@ struct msg_receiver {
        long                    r_msgtype;
        long                    r_maxsize;
 
-       /*
-        * Mark r_msg volatile so that the compiler
-        * does not try to get smart and optimize
-        * it. We rely on this for the lockless
-        * receive algorithm.
-        */
-       struct msg_msg          *volatile r_msg;
+       struct msg_msg          *r_msg;
 };
 
 /* one msg_sender for each sleeping sender */
 struct msg_sender {
        struct list_head        list;
        struct task_struct      *tsk;
+       size_t                  msgsz;
 };
 
 #define SEARCH_ANY             1
@@ -159,45 +154,72 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
        return msq->q_perm.id;
 }
 
-static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
+static inline bool msg_fits_inqueue(struct msg_queue *msq, size_t msgsz)
+{
+       return msgsz + msq->q_cbytes <= msq->q_qbytes &&
+               1 + msq->q_qnum <= msq->q_qbytes;
+}
+
+static inline void ss_add(struct msg_queue *msq,
+                         struct msg_sender *mss, size_t msgsz)
 {
        mss->tsk = current;
+       mss->msgsz = msgsz;
        __set_current_state(TASK_INTERRUPTIBLE);
        list_add_tail(&mss->list, &msq->q_senders);
 }
 
 static inline void ss_del(struct msg_sender *mss)
 {
-       if (mss->list.next != NULL)
+       if (mss->list.next)
                list_del(&mss->list);
 }
 
-static void ss_wakeup(struct list_head *h, int kill)
+static void ss_wakeup(struct msg_queue *msq,
+                     struct wake_q_head *wake_q, bool kill)
 {
        struct msg_sender *mss, *t;
+       struct task_struct *stop_tsk = NULL;
+       struct list_head *h = &msq->q_senders;
 
        list_for_each_entry_safe(mss, t, h, list) {
                if (kill)
                        mss->list.next = NULL;
-               wake_up_process(mss->tsk);
+
+               /*
+                * Stop at the first task we don't wakeup,
+                * we've already iterated the original
+                * sender queue.
+                */
+               else if (stop_tsk == mss->tsk)
+                       break;
+               /*
+                * We are not in an EIDRM scenario here, therefore
+                * verify that we really need to wakeup the task.
+                * To maintain current semantics and wakeup order,
+                * move the sender to the tail on behalf of the
+                * blocked task.
+                */
+               else if (!msg_fits_inqueue(msq, mss->msgsz)) {
+                       if (!stop_tsk)
+                               stop_tsk = mss->tsk;
+
+                       list_move_tail(&mss->list, &msq->q_senders);
+                       continue;
+               }
+
+               wake_q_add(wake_q, mss->tsk);
        }
 }
 
-static void expunge_all(struct msg_queue *msq, int res)
+static void expunge_all(struct msg_queue *msq, int res,
+                       struct wake_q_head *wake_q)
 {
        struct msg_receiver *msr, *t;
 
        list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
-               msr->r_msg = NULL; /* initialize expunge ordering */
-               wake_up_process(msr->r_tsk);
-               /*
-                * Ensure that the wakeup is visible before setting r_msg as
-                * the receiving end depends on it: either spinning on a nil,
-                * or dealing with -EAGAIN cases. See lockless receive part 1
-                * and 2 in do_msgrcv().
-                */
-               smp_wmb(); /* barrier (B) */
-               msr->r_msg = ERR_PTR(res);
+               wake_q_add(wake_q, msr->r_tsk);
+               WRITE_ONCE(msr->r_msg, ERR_PTR(res));
        }
 }
 
@@ -213,11 +235,13 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
        struct msg_msg *msg, *t;
        struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
+       WAKE_Q(wake_q);
 
-       expunge_all(msq, -EIDRM);
-       ss_wakeup(&msq->q_senders, 1);
+       expunge_all(msq, -EIDRM, &wake_q);
+       ss_wakeup(msq, &wake_q, true);
        msg_rmid(ns, msq);
        ipc_unlock_object(&msq->q_perm);
+       wake_up_q(&wake_q);
        rcu_read_unlock();
 
        list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
@@ -372,6 +396,9 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
                freeque(ns, ipcp);
                goto out_up;
        case IPC_SET:
+       {
+               WAKE_Q(wake_q);
+
                if (msqid64.msg_qbytes > ns->msg_ctlmnb &&
                    !capable(CAP_SYS_RESOURCE)) {
                        err = -EPERM;
@@ -386,15 +413,21 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
                msq->q_qbytes = msqid64.msg_qbytes;
 
                msq->q_ctime = get_seconds();
-               /* sleeping receivers might be excluded by
+               /*
+                * Sleeping receivers might be excluded by
                 * stricter permissions.
                 */
-               expunge_all(msq, -EAGAIN);
-               /* sleeping senders might be able to send
+               expunge_all(msq, -EAGAIN, &wake_q);
+               /*
+                * Sleeping senders might be able to send
                 * due to a larger queue size.
                 */
-               ss_wakeup(&msq->q_senders, 0);
-               break;
+               ss_wakeup(msq, &wake_q, false);
+               ipc_unlock_object(&msq->q_perm);
+               wake_up_q(&wake_q);
+
+               goto out_unlock1;
+       }
        default:
                err = -EINVAL;
                goto out_unlock1;
@@ -566,7 +599,8 @@ static int testmsg(struct msg_msg *msg, long type, int mode)
        return 0;
 }
 
-static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
+static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
+                                struct wake_q_head *wake_q)
 {
        struct msg_receiver *msr, *t;
 
@@ -577,27 +611,14 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
 
                        list_del(&msr->r_list);
                        if (msr->r_maxsize < msg->m_ts) {
-                               /* initialize pipelined send ordering */
-                               msr->r_msg = NULL;
-                               wake_up_process(msr->r_tsk);
-                               /* barrier (B) see barrier comment below */
-                               smp_wmb();
-                               msr->r_msg = ERR_PTR(-E2BIG);
+                               wake_q_add(wake_q, msr->r_tsk);
+                               WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
                        } else {
-                               msr->r_msg = NULL;
                                msq->q_lrpid = task_pid_vnr(msr->r_tsk);
                                msq->q_rtime = get_seconds();
-                               wake_up_process(msr->r_tsk);
-                               /*
-                                * Ensure that the wakeup is visible before
-                                * setting r_msg, as the receiving can otherwise
-                                * exit - once r_msg is set, the receiver can
-                                * continue. See lockless receive part 1 and 2
-                                * in do_msgrcv(). Barrier (B).
-                                */
-                               smp_wmb();
-                               msr->r_msg = msg;
 
+                               wake_q_add(wake_q, msr->r_tsk);
+                               WRITE_ONCE(msr->r_msg, msg);
                                return 1;
                        }
                }
@@ -613,6 +634,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
        struct msg_msg *msg;
        int err;
        struct ipc_namespace *ns;
+       WAKE_Q(wake_q);
 
        ns = current->nsproxy->ipc_ns;
 
@@ -635,14 +657,14 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                goto out_unlock1;
        }
 
-       ipc_lock_object(&msq->q_perm);
-
        for (;;) {
                struct msg_sender s;
 
                err = -EACCES;
                if (ipcperms(ns, &msq->q_perm, S_IWUGO))
-                       goto out_unlock0;
+                       goto out_unlock1;
+
+               ipc_lock_object(&msq->q_perm);
 
                /* raced with RMID? */
                if (!ipc_valid_object(&msq->q_perm)) {
@@ -654,10 +676,8 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                if (err)
                        goto out_unlock0;
 
-               if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
-                               1 + msq->q_qnum <= msq->q_qbytes) {
+               if (msg_fits_inqueue(msq, msgsz))
                        break;
-               }
 
                /* queue full, wait: */
                if (msgflg & IPC_NOWAIT) {
@@ -666,7 +686,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                }
 
                /* enqueue the sender and prepare to block */
-               ss_add(msq, &s);
+               ss_add(msq, &s, msgsz);
 
                if (!ipc_rcu_getref(msq)) {
                        err = -EIDRM;
@@ -686,7 +706,6 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                        err = -EIDRM;
                        goto out_unlock0;
                }
-
                ss_del(&s);
 
                if (signal_pending(current)) {
@@ -694,11 +713,13 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                        goto out_unlock0;
                }
 
+               ipc_unlock_object(&msq->q_perm);
        }
+
        msq->q_lspid = task_tgid_vnr(current);
        msq->q_stime = get_seconds();
 
-       if (!pipelined_send(msq, msg)) {
+       if (!pipelined_send(msq, msg, &wake_q)) {
                /* no one is waiting for this message, enqueue it */
                list_add_tail(&msg->m_list, &msq->q_messages);
                msq->q_cbytes += msgsz;
@@ -712,6 +733,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 
 out_unlock0:
        ipc_unlock_object(&msq->q_perm);
+       wake_up_q(&wake_q);
 out_unlock1:
        rcu_read_unlock();
        if (msg != NULL)
@@ -829,6 +851,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
        struct msg_queue *msq;
        struct ipc_namespace *ns;
        struct msg_msg *msg, *copy = NULL;
+       WAKE_Q(wake_q);
 
        ns = current->nsproxy->ipc_ns;
 
@@ -893,7 +916,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
                        msq->q_cbytes -= msg->m_ts;
                        atomic_sub(msg->m_ts, &ns->msg_bytes);
                        atomic_dec(&ns->msg_hdrs);
-                       ss_wakeup(&msq->q_senders, 0);
+                       ss_wakeup(msq, &wake_q, false);
 
                        goto out_unlock0;
                }
@@ -919,71 +942,38 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
                rcu_read_unlock();
                schedule();
 
-               /* Lockless receive, part 1:
-                * Disable preemption.  We don't hold a reference to the queue
-                * and getting a reference would defeat the idea of a lockless
-                * operation, thus the code relies on rcu to guarantee the
-                * existence of msq:
+               /*
+                * Lockless receive, part 1:
+                * We don't hold a reference to the queue and getting a
+                * reference would defeat the idea of a lockless operation,
+                * thus the code relies on rcu to guarantee the existence of
+                * msq:
                 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
                 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
-                * rcu_read_lock() prevents preemption between reading r_msg
-                * and acquiring the q_perm.lock in ipc_lock_object().
                 */
                rcu_read_lock();
 
-               /* Lockless receive, part 2:
-                * Wait until pipelined_send or expunge_all are outside of
-                * wake_up_process(). There is a race with exit(), see
-                * ipc/mqueue.c for the details. The correct serialization
-                * ensures that a receiver cannot continue without the wakeup
-                * being visibible _before_ setting r_msg:
-                *
-                * CPU 0                             CPU 1
-                * <loop receiver>
-                *   smp_rmb(); (A) <-- pair -.      <waker thread>
-                *   <load ->r_msg>           |        msr->r_msg = NULL;
-                *                            |        wake_up_process();
-                * <continue>                 `------> smp_wmb(); (B)
-                *                                     msr->r_msg = msg;
+               /*
+                * Lockless receive, part 2:
+                * The work in pipelined_send() and expunge_all():
+                * - Set pointer to message
+                * - Queue the receiver task for later wakeup
+                * - Wake up the process after the lock is dropped.
                 *
-                * Where (A) orders the message value read and where (B) orders
-                * the write to the r_msg -- done in both pipelined_send and
-                * expunge_all.
-                */
-               for (;;) {
-                       /*
-                        * Pairs with writer barrier in pipelined_send
-                        * or expunge_all.
-                        */
-                       smp_rmb(); /* barrier (A) */
-                       msg = (struct msg_msg *)msr_d.r_msg;
-                       if (msg)
-                               break;
-
-                       /*
-                        * The cpu_relax() call is a compiler barrier
-                        * which forces everything in this loop to be
-                        * re-loaded.
-                        */
-                       cpu_relax();
-               }
-
-               /* Lockless receive, part 3:
-                * If there is a message or an error then accept it without
-                * locking.
+                * Should the process wake up before this wakeup (due to a
+                * signal) it will either see the message and continue ...
                 */
+               msg = READ_ONCE(msr_d.r_msg);
                if (msg != ERR_PTR(-EAGAIN))
                        goto out_unlock1;
 
-               /* Lockless receive, part 3:
-                * Acquire the queue spinlock.
-                */
+                /*
+                 * ... or see -EAGAIN, acquire the lock to check the message
+                 * again.
+                 */
                ipc_lock_object(&msq->q_perm);
 
-               /* Lockless receive, part 4:
-                * Repeat test after acquiring the spinlock.
-                */
-               msg = (struct msg_msg *)msr_d.r_msg;
+               msg = msr_d.r_msg;
                if (msg != ERR_PTR(-EAGAIN))
                        goto out_unlock0;
 
@@ -998,6 +988,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
 
 out_unlock0:
        ipc_unlock_object(&msq->q_perm);
+       wake_up_q(&wake_q);
 out_unlock1:
        rcu_read_unlock();
        if (IS_ERR(msg)) {
index 7c9d4f7683c073de736a0723a0c0d7a1e3178ad2..5e318c5f749d1ed8e1b4b04d35109faeae5943cf 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -162,14 +162,21 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 
 /*
  * Locking:
+ * a) global sem_lock() for read/write
  *     sem_undo.id_next,
  *     sem_array.complex_count,
- *     sem_array.pending{_alter,_cont},
- *     sem_array.sem_undo: global sem_lock() for read/write
- *     sem_undo.proc_next: only "current" is allowed to read/write that field.
+ *     sem_array.complex_mode
+ *     sem_array.pending{_alter,_const},
+ *     sem_array.sem_undo
  *
+ * b) global or semaphore sem_lock() for read/write:
  *     sem_array.sem_base[i].pending_{const,alter}:
- *             global or semaphore sem_lock() for read/write
+ *     sem_array.complex_mode (for read)
+ *
+ * c) special:
+ *     sem_undo_list.list_proc:
+ *     * undo_list->lock for write
+ *     * rcu for read
  */
 
 #define sc_semmsl      sem_ctls[0]
@@ -260,30 +267,61 @@ static void sem_rcu_free(struct rcu_head *head)
 }
 
 /*
- * Wait until all currently ongoing simple ops have completed.
+ * Enter the mode suitable for non-simple operations:
  * Caller must own sem_perm.lock.
- * New simple ops cannot start, because simple ops first check
- * that sem_perm.lock is free.
- * that a) sem_perm.lock is free and b) complex_count is 0.
  */
-static void sem_wait_array(struct sem_array *sma)
+static void complexmode_enter(struct sem_array *sma)
 {
        int i;
        struct sem *sem;
 
-       if (sma->complex_count)  {
-               /* The thread that increased sma->complex_count waited on
-                * all sem->lock locks. Thus we don't need to wait again.
-                */
+       if (sma->complex_mode)  {
+               /* We are already in complex_mode. Nothing to do */
                return;
        }
 
+       /* We need a full barrier after seting complex_mode:
+        * The write to complex_mode must be visible
+        * before we read the first sem->lock spinlock state.
+        */
+       smp_store_mb(sma->complex_mode, true);
+
        for (i = 0; i < sma->sem_nsems; i++) {
                sem = sma->sem_base + i;
                spin_unlock_wait(&sem->lock);
        }
+       /*
+        * spin_unlock_wait() is not a memory barriers, it is only a
+        * control barrier. The code must pair with spin_unlock(&sem->lock),
+        * thus just the control barrier is insufficient.
+        *
+        * smp_rmb() is sufficient, as writes cannot pass the control barrier.
+        */
+       smp_rmb();
+}
+
+/*
+ * Try to leave the mode that disallows simple operations:
+ * Caller must own sem_perm.lock.
+ */
+static void complexmode_tryleave(struct sem_array *sma)
+{
+       if (sma->complex_count)  {
+               /* Complex ops are sleeping.
+                * We must stay in complex mode
+                */
+               return;
+       }
+       /*
+        * Immediately after setting complex_mode to false,
+        * a simple op can start. Thus: all memory writes
+        * performed by the current operation must be visible
+        * before we set complex_mode to false.
+        */
+       smp_store_release(&sma->complex_mode, false);
 }
 
+#define SEM_GLOBAL_LOCK        (-1)
 /*
  * If the request contains only one semaphore operation, and there are
  * no complex transactions pending, lock only the semaphore involved.
@@ -300,56 +338,42 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
                /* Complex operation - acquire a full lock */
                ipc_lock_object(&sma->sem_perm);
 
-               /* And wait until all simple ops that are processed
-                * right now have dropped their locks.
-                */
-               sem_wait_array(sma);
-               return -1;
+               /* Prevent parallel simple ops */
+               complexmode_enter(sma);
+               return SEM_GLOBAL_LOCK;
        }
 
        /*
         * Only one semaphore affected - try to optimize locking.
-        * The rules are:
-        * - optimized locking is possible if no complex operation
-        *   is either enqueued or processed right now.
-        * - The test for enqueued complex ops is simple:
-        *      sma->complex_count != 0
-        * - Testing for complex ops that are processed right now is
-        *   a bit more difficult. Complex ops acquire the full lock
-        *   and first wait that the running simple ops have completed.
-        *   (see above)
-        *   Thus: If we own a simple lock and the global lock is free
-        *      and complex_count is now 0, then it will stay 0 and
-        *      thus just locking sem->lock is sufficient.
+        * Optimized locking is possible if no complex operation
+        * is either enqueued or processed right now.
+        *
+        * Both facts are tracked by complex_mode.
         */
        sem = sma->sem_base + sops->sem_num;
 
-       if (sma->complex_count == 0) {
+       /*
+        * Initial check for complex_mode. Just an optimization,
+        * no locking, no memory barrier.
+        */
+       if (!sma->complex_mode) {
                /*
                 * It appears that no complex operation is around.
                 * Acquire the per-semaphore lock.
                 */
                spin_lock(&sem->lock);
 
-               /* Then check that the global lock is free */
-               if (!spin_is_locked(&sma->sem_perm.lock)) {
-                       /*
-                        * We need a memory barrier with acquire semantics,
-                        * otherwise we can race with another thread that does:
-                        *      complex_count++;
-                        *      spin_unlock(sem_perm.lock);
-                        */
-                       smp_acquire__after_ctrl_dep();
+               /*
+                * See 51d7d5205d33
+                * ("powerpc: Add smp_mb() to arch_spin_is_locked()"):
+                * A full barrier is required: the write of sem->lock
+                * must be visible before the read is executed
+                */
+               smp_mb();
 
-                       /*
-                        * Now repeat the test of complex_count:
-                        * It can't change anymore until we drop sem->lock.
-                        * Thus: if is now 0, then it will stay 0.
-                        */
-                       if (sma->complex_count == 0) {
-                               /* fast path successful! */
-                               return sops->sem_num;
-                       }
+               if (!smp_load_acquire(&sma->complex_mode)) {
+                       /* fast path successful! */
+                       return sops->sem_num;
                }
                spin_unlock(&sem->lock);
        }
@@ -369,15 +393,16 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
                /* Not a false alarm, thus complete the sequence for a
                 * full lock.
                 */
-               sem_wait_array(sma);
-               return -1;
+               complexmode_enter(sma);
+               return SEM_GLOBAL_LOCK;
        }
 }
 
 static inline void sem_unlock(struct sem_array *sma, int locknum)
 {
-       if (locknum == -1) {
+       if (locknum == SEM_GLOBAL_LOCK) {
                unmerge_queues(sma);
+               complexmode_tryleave(sma);
                ipc_unlock_object(&sma->sem_perm);
        } else {
                struct sem *sem = sma->sem_base + locknum;
@@ -529,6 +554,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
        }
 
        sma->complex_count = 0;
+       sma->complex_mode = true; /* dropped by sem_unlock below */
        INIT_LIST_HEAD(&sma->pending_alter);
        INIT_LIST_HEAD(&sma->pending_const);
        INIT_LIST_HEAD(&sma->list_id);
@@ -2184,10 +2210,10 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
        /*
         * The proc interface isn't aware of sem_lock(), it calls
         * ipc_lock_object() directly (in sysvipc_find_ipc).
-        * In order to stay compatible with sem_lock(), we must wait until
-        * all simple semop() calls have left their critical regions.
+        * In order to stay compatible with sem_lock(), we must
+        * enter / leave complex_mode.
         */
-       sem_wait_array(sma);
+       complexmode_enter(sma);
 
        sem_otime = get_semotime(sma);
 
@@ -2204,6 +2230,8 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
                   sem_otime,
                   sma->sem_ctime);
 
+       complexmode_tryleave(sma);
+
        return 0;
 }
 #endif
index 9f748ed7bea8eb83ca37b38289cdb418c6841672..1a8f34f6360112bab3cc122b8027ec479768a7af 100644 (file)
@@ -11,7 +11,6 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y
 CONFIG_ARMV8_DEPRECATED=y
 CONFIG_ASHMEM=y
 CONFIG_AUDIT=y
-CONFIG_BLK_DEV_DM=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_CPUACCT=y
@@ -19,9 +18,7 @@ CONFIG_CGROUP_DEBUG=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_CP15_BARRIER_EMULATION=y
-CONFIG_DM_CRYPT=y
-CONFIG_DM_VERITY=y
-CONFIG_DM_VERITY_FEC=y
+CONFIG_DEFAULT_SECURITY_SELINUX=y
 CONFIG_EMBEDDED=y
 CONFIG_FB=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -41,7 +38,6 @@ CONFIG_IPV6=y
 CONFIG_IPV6_MIP6=y
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -135,6 +131,7 @@ CONFIG_PREEMPT=y
 CONFIG_QUOTA=y
 CONFIG_RTC_CLASS=y
 CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECCOMP=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
index e3b953e966d26a1cdc47778158b0ab76a0f52ecf..297756be369c68d9e56c3c5823de5530118671f6 100644 (file)
@@ -6,12 +6,16 @@
 # CONFIG_PM_WAKELOCKS_GC is not set
 # CONFIG_VT is not set
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_BLK_DEV_DM=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
 CONFIG_COMPACTION=y
 CONFIG_DEBUG_RODATA=y
+CONFIG_DM_CRYPT=y
 CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
 CONFIG_DRAGONRISE_FF=y
 CONFIG_ENABLE_DEFAULT_TRACERS=y
 CONFIG_EXT4_FS=y
index 091a78be3b09d5669d9c10b98f6300e4171d2413..fb66ec7566d58a60c065f18b68458edaa3b7a650 100644 (file)
@@ -511,7 +511,7 @@ static void exit_mm(struct task_struct *tsk)
        mm_update_next_owner(mm);
        mmput(mm);
        if (test_thread_flag(TIF_MEMDIE))
-               exit_oom_victim(tsk);
+               exit_oom_victim();
 }
 
 static struct task_struct *find_alive_thread(struct task_struct *p)
index c2ecca44406b130e0ec7abc4d2648beff6738814..3584f521e3a63d82b6f1827f5f856fb0832d8077 100644 (file)
@@ -286,6 +286,12 @@ static inline void free_signal_struct(struct signal_struct *sig)
 {
        taskstats_tgid_free(sig);
        sched_autogroup_exit(sig);
+       /*
+        * __mmdrop is not safe to call from softirq context on x86 due to
+        * pgd_dtor so postpone it to the async context
+        */
+       if (sig->oom_mm)
+               mmdrop_async(sig->oom_mm);
        kmem_cache_free(signal_cachep, sig);
 }
 
@@ -768,6 +774,7 @@ static inline void __mmput(struct mm_struct *mm)
        ksm_exit(mm);
        khugepaged_exit(mm); /* must run before exit_mmap */
        exit_mmap(mm);
+       mm_put_huge_zero_page(mm);
        set_mm_exe_file(mm, NULL);
        if (!list_empty(&mm->mmlist)) {
                spin_lock(&mmlist_lock);
@@ -776,6 +783,7 @@ static inline void __mmput(struct mm_struct *mm)
        }
        if (mm->binfmt)
                module_put(mm->binfmt->module);
+       set_bit(MMF_OOM_SKIP, &mm->flags);
        mmdrop(mm);
 }
 
index 74d431d252515042296bb739cb8aba456635113e..2fcadd66a8fd7cba19e264c6542ec8940a248a73 100644 (file)
@@ -7,55 +7,31 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/user_namespace.h>
+#include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 
 struct group_info *groups_alloc(int gidsetsize)
 {
-       struct group_info *group_info;
-       int nblocks;
-       int i;
-
-       nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
-       /* Make sure we always allocate at least one indirect block pointer */
-       nblocks = nblocks ? : 1;
-       group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
-       if (!group_info)
+       struct group_info *gi;
+       unsigned int len;
+
+       len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize;
+       gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY);
+       if (!gi)
+               gi = __vmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_HIGHMEM, PAGE_KERNEL);
+       if (!gi)
                return NULL;
-       group_info->ngroups = gidsetsize;
-       group_info->nblocks = nblocks;
-       atomic_set(&group_info->usage, 1);
-
-       if (gidsetsize <= NGROUPS_SMALL)
-               group_info->blocks[0] = group_info->small_block;
-       else {
-               for (i = 0; i < nblocks; i++) {
-                       kgid_t *b;
-                       b = (void *)__get_free_page(GFP_USER);
-                       if (!b)
-                               goto out_undo_partial_alloc;
-                       group_info->blocks[i] = b;
-               }
-       }
-       return group_info;
 
-out_undo_partial_alloc:
-       while (--i >= 0) {
-               free_page((unsigned long)group_info->blocks[i]);
-       }
-       kfree(group_info);
-       return NULL;
+       atomic_set(&gi->usage, 1);
+       gi->ngroups = gidsetsize;
+       return gi;
 }
 
 EXPORT_SYMBOL(groups_alloc);
 
 void groups_free(struct group_info *group_info)
 {
-       if (group_info->blocks[0] != group_info->small_block) {
-               int i;
-               for (i = 0; i < group_info->nblocks; i++)
-                       free_page((unsigned long)group_info->blocks[i]);
-       }
-       kfree(group_info);
+       kvfree(group_info);
 }
 
 EXPORT_SYMBOL(groups_free);
@@ -70,7 +46,7 @@ static int groups_to_user(gid_t __user *grouplist,
 
        for (i = 0; i < count; i++) {
                gid_t gid;
-               gid = from_kgid_munged(user_ns, GROUP_AT(group_info, i));
+               gid = from_kgid_munged(user_ns, group_info->gid[i]);
                if (put_user(gid, grouplist+i))
                        return -EFAULT;
        }
@@ -95,7 +71,7 @@ static int groups_from_user(struct group_info *group_info,
                if (!gid_valid(kgid))
                        return -EINVAL;
 
-               GROUP_AT(group_info, i) = kgid;
+               group_info->gid[i] = kgid;
        }
        return 0;
 }
@@ -115,15 +91,14 @@ static void groups_sort(struct group_info *group_info)
                for (base = 0; base < max; base++) {
                        int left = base;
                        int right = left + stride;
-                       kgid_t tmp = GROUP_AT(group_info, right);
+                       kgid_t tmp = group_info->gid[right];
 
-                       while (left >= 0 && gid_gt(GROUP_AT(group_info, left), tmp)) {
-                               GROUP_AT(group_info, right) =
-                                   GROUP_AT(group_info, left);
+                       while (left >= 0 && gid_gt(group_info->gid[left], tmp)) {
+                               group_info->gid[right] = group_info->gid[left];
                                right = left;
                                left -= stride;
                        }
-                       GROUP_AT(group_info, right) = tmp;
+                       group_info->gid[right] = tmp;
                }
                stride /= 3;
        }
@@ -141,9 +116,9 @@ int groups_search(const struct group_info *group_info, kgid_t grp)
        right = group_info->ngroups;
        while (left < right) {
                unsigned int mid = (left+right)/2;
-               if (gid_gt(grp, GROUP_AT(group_info, mid)))
+               if (gid_gt(grp, group_info->gid[mid]))
                        left = mid + 1;
-               else if (gid_lt(grp, GROUP_AT(group_info, mid)))
+               else if (gid_lt(grp, group_info->gid[mid]))
                        right = mid;
                else
                        return 1;
index ca8cea1ef6737dfe4c16a62f96d3d0569f181c41..e6480e20379e4d0ee4fb51284364d6e7f7f1e25e 100644 (file)
@@ -71,6 +71,32 @@ void __weak nmi_panic_self_stop(struct pt_regs *regs)
        panic_smp_self_stop();
 }
 
+/*
+ * Stop other CPUs in panic.  Architecture dependent code may override this
+ * with more suitable version.  For example, if the architecture supports
+ * crash dump, it should save registers of each stopped CPU and disable
+ * per-CPU features such as virtualization extensions.
+ */
+void __weak crash_smp_send_stop(void)
+{
+       static int cpus_stopped;
+
+       /*
+        * This function can be called twice in panic path, but obviously
+        * we execute this only once.
+        */
+       if (cpus_stopped)
+               return;
+
+       /*
+        * Note smp_send_stop is the usual smp shutdown function, which
+        * unfortunately means it may not be hardened to work in a panic
+        * situation.
+        */
+       smp_send_stop();
+       cpus_stopped = 1;
+}
+
 atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
 
 /*
@@ -164,14 +190,21 @@ void panic(const char *fmt, ...)
        if (!_crash_kexec_post_notifiers) {
                printk_nmi_flush_on_panic();
                __crash_kexec(NULL);
-       }
 
-       /*
-        * Note smp_send_stop is the usual smp shutdown function, which
-        * unfortunately means it may not be hardened to work in a panic
-        * situation.
-        */
-       smp_send_stop();
+               /*
+                * Note smp_send_stop is the usual smp shutdown function, which
+                * unfortunately means it may not be hardened to work in a
+                * panic situation.
+                */
+               smp_send_stop();
+       } else {
+               /*
+                * If we want to do crash dump after notifier calls and
+                * kmsg_dump, we will need architecture dependent extra
+                * works in addition to stopping other CPUs.
+                */
+               crash_smp_send_stop();
+       }
 
        /*
         * Run any panic handlers, including those that might need to
index 8f27d5a8adf6ba509a5d915c6a20f4ed14fe552e..2fba066e125fa960b484b362d0fd6895f2e103a0 100644 (file)
@@ -144,23 +144,12 @@ int freeze_processes(void)
        /*
         * Now that the whole userspace is frozen we need to disbale
         * the OOM killer to disallow any further interference with
-        * killable tasks.
+        * killable tasks. There is no guarantee oom victims will
+        * ever reach a point they go away we have to wait with a timeout.
         */
-       if (!error && !oom_killer_disable())
+       if (!error && !oom_killer_disable(msecs_to_jiffies(freeze_timeout_msecs)))
                error = -EBUSY;
 
-       /*
-        * There is a hard to fix race between oom_reaper kernel thread
-        * and oom_killer_disable. oom_reaper calls exit_oom_victim
-        * before the victim reaches exit_mm so try to freeze all the tasks
-        * again and catch such a left over task.
-        */
-       if (!error) {
-               pr_info("Double checking all user space processes after OOM killer disable... ");
-               error = try_to_freeze_tasks(true);
-               pr_cont("\n");
-       }
-
        if (error)
                thaw_processes();
        return error;
index eea6dbc2d8cf6ffc4d71729eb270a659b0a8c77d..8019cc0d3a730654e0da8320003ba813600cb722 100644 (file)
@@ -253,6 +253,17 @@ static int preferred_console = -1;
 int console_set_on_cmdline;
 EXPORT_SYMBOL(console_set_on_cmdline);
 
+#ifdef CONFIG_OF
+static bool of_specified_console;
+
+void console_set_by_of(void)
+{
+       of_specified_console = true;
+}
+#else
+# define of_specified_console false
+#endif
+
 /* Flag: console code may call schedule() */
 static int console_may_schedule;
 
@@ -2647,7 +2658,7 @@ void register_console(struct console *newcon)
         *      didn't select a console we take the first one
         *      that registers here.
         */
-       if (preferred_console < 0) {
+       if (preferred_console < 0 && !of_specified_console) {
                if (newcon->index < 0)
                        newcon->index = 0;
                if (newcon->setup == NULL ||
index 1d3b7665d0be0223343530bcfc940bfa3e525c28..2a99027312a6af6773027e20029752efddc418e3 100644 (file)
@@ -73,6 +73,8 @@ void __ptrace_unlink(struct task_struct *child)
 {
        BUG_ON(!child->ptrace);
 
+       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+
        child->parent = child->real_parent;
        list_del_init(&child->ptrace_entry);
 
@@ -489,7 +491,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
 
        /* Architecture-specific hardware disable .. */
        ptrace_disable(child);
-       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 
        write_lock_irq(&tasklist_lock);
        /*
index fc9b4a4af463a61732e185cceda2afffc827fd6e..037be708c24cc0211b2839362c18b0cb04882e40 100644 (file)
@@ -328,13 +328,15 @@ static struct rchan_callbacks default_channel_callbacks = {
 
 /**
  *     wakeup_readers - wake up readers waiting on a channel
- *     @data: contains the channel buffer
+ *     @work: contains the channel buffer
  *
- *     This is the timer function used to defer reader waking.
+ *     This is the function used to defer reader waking
  */
-static void wakeup_readers(unsigned long data)
+static void wakeup_readers(struct irq_work *work)
 {
-       struct rchan_buf *buf = (struct rchan_buf *)data;
+       struct rchan_buf *buf;
+
+       buf = container_of(work, struct rchan_buf, wakeup_work);
        wake_up_interruptible(&buf->read_wait);
 }
 
@@ -352,9 +354,10 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
        if (init) {
                init_waitqueue_head(&buf->read_wait);
                kref_init(&buf->kref);
-               setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
-       } else
-               del_timer_sync(&buf->timer);
+               init_irq_work(&buf->wakeup_work, wakeup_readers);
+       } else {
+               irq_work_sync(&buf->wakeup_work);
+       }
 
        buf->subbufs_produced = 0;
        buf->subbufs_consumed = 0;
@@ -487,7 +490,7 @@ free_buf:
 static void relay_close_buf(struct rchan_buf *buf)
 {
        buf->finalized = 1;
-       del_timer_sync(&buf->timer);
+       irq_work_sync(&buf->wakeup_work);
        buf->chan->cb->remove_buf_file(buf->dentry);
        kref_put(&buf->kref, relay_remove_buf);
 }
@@ -754,14 +757,15 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
                        buf->early_bytes += buf->chan->subbuf_size -
                                            buf->padding[old_subbuf];
                smp_mb();
-               if (waitqueue_active(&buf->read_wait))
+               if (waitqueue_active(&buf->read_wait)) {
                        /*
                         * Calling wake_up_interruptible() from here
                         * will deadlock if we happen to be logging
                         * from the scheduler (trying to re-grab
                         * rq->lock), so defer it.
                         */
-                       mod_timer(&buf->timer, jiffies + 1);
+                       irq_work_queue(&buf->wakeup_work);
+               }
        }
 
        old = buf->data;
index 9fb873cfc75cfb332c59437bd936e1dd9493d92e..1d8718d5300d81b896f66e9c8c3ca9df97b4cbf4 100644 (file)
@@ -16,6 +16,9 @@
 
 #include "sched.h"
 
+/* Linker adds these: start and end of __cpuidle functions */
+extern char __cpuidle_text_start[], __cpuidle_text_end[];
+
 /**
  * sched_idle_set_state - Record idle state for the current CPU.
  * @idle_state: State to record.
@@ -53,7 +56,7 @@ static int __init cpu_idle_nopoll_setup(char *__unused)
 __setup("hlt", cpu_idle_nopoll_setup);
 #endif
 
-static inline int cpu_idle_poll(void)
+static noinline int __cpuidle cpu_idle_poll(void)
 {
        rcu_idle_enter();
        trace_cpu_idle_rcuidle(0, smp_processor_id());
@@ -84,7 +87,7 @@ void __weak arch_cpu_idle(void)
  *
  * To use when the cpuidle framework cannot be used.
  */
-void default_idle_call(void)
+void __cpuidle default_idle_call(void)
 {
        if (current_clr_polling_and_test()) {
                local_irq_enable();
@@ -271,6 +274,12 @@ static void cpu_idle_loop(void)
        }
 }
 
+bool cpu_in_idle(unsigned long pc)
+{
+       return pc >= (unsigned long)__cpuidle_text_start &&
+               pc < (unsigned long)__cpuidle_text_end;
+}
+
 void cpu_startup_entry(enum cpuhp_state state)
 {
        /*
index a13bbdaab47dc66fec2c0dd3da0b2c9898725bdf..56b42f822e090835e6a001884aaeac6ec8a01e4a 100644 (file)
@@ -106,7 +106,6 @@ extern unsigned int core_pipe_limit;
 extern int pid_max;
 extern int pid_max_min, pid_max_max;
 extern int percpu_pagelist_fraction;
-extern int compat_log;
 extern int latencytop_enabled;
 extern int sysctl_nr_open_min, sysctl_nr_open_max;
 #ifndef CONFIG_MMU
@@ -1084,15 +1083,6 @@ static struct ctl_table kern_table[] = {
                .extra1         = &neg_one,
        },
 #endif
-#ifdef CONFIG_COMPAT
-       {
-               .procname       = "compat-log",
-               .data           = &compat_log,
-               .maxlen         = sizeof (int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-#endif
 #ifdef CONFIG_RT_MUTEXES
        {
                .procname       = "max_lock_depth",
index d58cc4d8f0d1fa95c7ec0120cb408a9b4ad859e5..cc40793464e3ca1959c9960a3cdbc3180d4c468b 100644 (file)
@@ -117,7 +117,7 @@ static int groups16_to_user(old_gid_t __user *grouplist,
        kgid_t kgid;
 
        for (i = 0; i < group_info->ngroups; i++) {
-               kgid = GROUP_AT(group_info, i);
+               kgid = group_info->gid[i];
                group = high2lowgid(from_kgid_munged(user_ns, kgid));
                if (put_user(group, grouplist+i))
                        return -EFAULT;
@@ -142,7 +142,7 @@ static int groups16_from_user(struct group_info *group_info,
                if (!gid_valid(kgid))
                        return -EINVAL;
 
-               GROUP_AT(group_info, i) = kgid;
+               group_info->gid[i] = kgid;
        }
 
        return 0;
index 9acb29f280ec95813612a913307e4dcaec8b7e03..6d1020c03d41439d298d1c219ab6dc34d1b7eda3 100644 (file)
@@ -344,7 +344,6 @@ static void watchdog_overflow_callback(struct perf_event *event,
         */
        if (is_hardlockup()) {
                int this_cpu = smp_processor_id();
-               struct pt_regs *regs = get_irq_regs();
 
                /* only print hardlockups once */
                if (__this_cpu_read(hard_watchdog_warn) == true)
index d79909dc01ec8365ab1cf29b1c7ce6dd4c78b0ab..9d188fbd856a46fea4d153d4ef689a7ec2f3c20b 100644 (file)
@@ -185,6 +185,13 @@ config CRC8
          when they need to do cyclic redundancy check according CRC8
          algorithm. Module will be called crc8.
 
+config CRC64_ECMA
+       tristate "CRC64 ECMA function"
+       help
+         This option provides CRC64 ECMA function. Drivers may select this
+         when they need to do cyclic redundancy check according to the CRC64
+         ECMA algorithm.
+
 config AUDIT_GENERIC
        bool
        depends on AUDIT && !AUDIT_ARCH
index 5dc77a8ec297ec478c003e894af206956a39fb4c..4d5272c04cf92f4745f0f78f32d456cf0ffac286 100644 (file)
@@ -93,6 +93,7 @@ obj-$(CONFIG_CRC32)   += crc32.o
 obj-$(CONFIG_CRC7)     += crc7.o
 obj-$(CONFIG_LIBCRC32C)        += libcrc32c.o
 obj-$(CONFIG_CRC8)     += crc8.o
+obj-$(CONFIG_CRC64_ECMA)       += crc64_ecma.o
 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
 
 obj-$(CONFIG_842_COMPRESS) += 842/
diff --git a/lib/crc64_ecma.c b/lib/crc64_ecma.c
new file mode 100644 (file)
index 0000000..41629ea
--- /dev/null
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/crc64_ecma.h>
+
+
+#define CRC64_BYTE_MASK                        0xFF
+#define CRC64_TABLE_SIZE               256
+
+
+struct crc64_table {
+       u64 seed;
+       u64 table[CRC64_TABLE_SIZE];
+};
+
+
+static struct crc64_table CRC64_ECMA_182 = {
+       CRC64_DEFAULT_INITVAL,
+       {
+               0x0000000000000000ULL,
+               0xb32e4cbe03a75f6fULL,
+               0xf4843657a840a05bULL,
+               0x47aa7ae9abe7ff34ULL,
+               0x7bd0c384ff8f5e33ULL,
+               0xc8fe8f3afc28015cULL,
+               0x8f54f5d357cffe68ULL,
+               0x3c7ab96d5468a107ULL,
+               0xf7a18709ff1ebc66ULL,
+               0x448fcbb7fcb9e309ULL,
+               0x0325b15e575e1c3dULL,
+               0xb00bfde054f94352ULL,
+               0x8c71448d0091e255ULL,
+               0x3f5f08330336bd3aULL,
+               0x78f572daa8d1420eULL,
+               0xcbdb3e64ab761d61ULL,
+               0x7d9ba13851336649ULL,
+               0xceb5ed8652943926ULL,
+               0x891f976ff973c612ULL,
+               0x3a31dbd1fad4997dULL,
+               0x064b62bcaebc387aULL,
+               0xb5652e02ad1b6715ULL,
+               0xf2cf54eb06fc9821ULL,
+               0x41e11855055bc74eULL,
+               0x8a3a2631ae2dda2fULL,
+               0x39146a8fad8a8540ULL,
+               0x7ebe1066066d7a74ULL,
+               0xcd905cd805ca251bULL,
+               0xf1eae5b551a2841cULL,
+               0x42c4a90b5205db73ULL,
+               0x056ed3e2f9e22447ULL,
+               0xb6409f5cfa457b28ULL,
+               0xfb374270a266cc92ULL,
+               0x48190ecea1c193fdULL,
+               0x0fb374270a266cc9ULL,
+               0xbc9d3899098133a6ULL,
+               0x80e781f45de992a1ULL,
+               0x33c9cd4a5e4ecdceULL,
+               0x7463b7a3f5a932faULL,
+               0xc74dfb1df60e6d95ULL,
+               0x0c96c5795d7870f4ULL,
+               0xbfb889c75edf2f9bULL,
+               0xf812f32ef538d0afULL,
+               0x4b3cbf90f69f8fc0ULL,
+               0x774606fda2f72ec7ULL,
+               0xc4684a43a15071a8ULL,
+               0x83c230aa0ab78e9cULL,
+               0x30ec7c140910d1f3ULL,
+               0x86ace348f355aadbULL,
+               0x3582aff6f0f2f5b4ULL,
+               0x7228d51f5b150a80ULL,
+               0xc10699a158b255efULL,
+               0xfd7c20cc0cdaf4e8ULL,
+               0x4e526c720f7dab87ULL,
+               0x09f8169ba49a54b3ULL,
+               0xbad65a25a73d0bdcULL,
+               0x710d64410c4b16bdULL,
+               0xc22328ff0fec49d2ULL,
+               0x85895216a40bb6e6ULL,
+               0x36a71ea8a7ace989ULL,
+               0x0adda7c5f3c4488eULL,
+               0xb9f3eb7bf06317e1ULL,
+               0xfe5991925b84e8d5ULL,
+               0x4d77dd2c5823b7baULL,
+               0x64b62bcaebc387a1ULL,
+               0xd7986774e864d8ceULL,
+               0x90321d9d438327faULL,
+               0x231c512340247895ULL,
+               0x1f66e84e144cd992ULL,
+               0xac48a4f017eb86fdULL,
+               0xebe2de19bc0c79c9ULL,
+               0x58cc92a7bfab26a6ULL,
+               0x9317acc314dd3bc7ULL,
+               0x2039e07d177a64a8ULL,
+               0x67939a94bc9d9b9cULL,
+               0xd4bdd62abf3ac4f3ULL,
+               0xe8c76f47eb5265f4ULL,
+               0x5be923f9e8f53a9bULL,
+               0x1c4359104312c5afULL,
+               0xaf6d15ae40b59ac0ULL,
+               0x192d8af2baf0e1e8ULL,
+               0xaa03c64cb957be87ULL,
+               0xeda9bca512b041b3ULL,
+               0x5e87f01b11171edcULL,
+               0x62fd4976457fbfdbULL,
+               0xd1d305c846d8e0b4ULL,
+               0x96797f21ed3f1f80ULL,
+               0x2557339fee9840efULL,
+               0xee8c0dfb45ee5d8eULL,
+               0x5da24145464902e1ULL,
+               0x1a083bacedaefdd5ULL,
+               0xa9267712ee09a2baULL,
+               0x955cce7fba6103bdULL,
+               0x267282c1b9c65cd2ULL,
+               0x61d8f8281221a3e6ULL,
+               0xd2f6b4961186fc89ULL,
+               0x9f8169ba49a54b33ULL,
+               0x2caf25044a02145cULL,
+               0x6b055fede1e5eb68ULL,
+               0xd82b1353e242b407ULL,
+               0xe451aa3eb62a1500ULL,
+               0x577fe680b58d4a6fULL,
+               0x10d59c691e6ab55bULL,
+               0xa3fbd0d71dcdea34ULL,
+               0x6820eeb3b6bbf755ULL,
+               0xdb0ea20db51ca83aULL,
+               0x9ca4d8e41efb570eULL,
+               0x2f8a945a1d5c0861ULL,
+               0x13f02d374934a966ULL,
+               0xa0de61894a93f609ULL,
+               0xe7741b60e174093dULL,
+               0x545a57dee2d35652ULL,
+               0xe21ac88218962d7aULL,
+               0x5134843c1b317215ULL,
+               0x169efed5b0d68d21ULL,
+               0xa5b0b26bb371d24eULL,
+               0x99ca0b06e7197349ULL,
+               0x2ae447b8e4be2c26ULL,
+               0x6d4e3d514f59d312ULL,
+               0xde6071ef4cfe8c7dULL,
+               0x15bb4f8be788911cULL,
+               0xa6950335e42fce73ULL,
+               0xe13f79dc4fc83147ULL,
+               0x521135624c6f6e28ULL,
+               0x6e6b8c0f1807cf2fULL,
+               0xdd45c0b11ba09040ULL,
+               0x9aefba58b0476f74ULL,
+               0x29c1f6e6b3e0301bULL,
+               0xc96c5795d7870f42ULL,
+               0x7a421b2bd420502dULL,
+               0x3de861c27fc7af19ULL,
+               0x8ec62d7c7c60f076ULL,
+               0xb2bc941128085171ULL,
+               0x0192d8af2baf0e1eULL,
+               0x4638a2468048f12aULL,
+               0xf516eef883efae45ULL,
+               0x3ecdd09c2899b324ULL,
+               0x8de39c222b3eec4bULL,
+               0xca49e6cb80d9137fULL,
+               0x7967aa75837e4c10ULL,
+               0x451d1318d716ed17ULL,
+               0xf6335fa6d4b1b278ULL,
+               0xb199254f7f564d4cULL,
+               0x02b769f17cf11223ULL,
+               0xb4f7f6ad86b4690bULL,
+               0x07d9ba1385133664ULL,
+               0x4073c0fa2ef4c950ULL,
+               0xf35d8c442d53963fULL,
+               0xcf273529793b3738ULL,
+               0x7c0979977a9c6857ULL,
+               0x3ba3037ed17b9763ULL,
+               0x888d4fc0d2dcc80cULL,
+               0x435671a479aad56dULL,
+               0xf0783d1a7a0d8a02ULL,
+               0xb7d247f3d1ea7536ULL,
+               0x04fc0b4dd24d2a59ULL,
+               0x3886b22086258b5eULL,
+               0x8ba8fe9e8582d431ULL,
+               0xcc0284772e652b05ULL,
+               0x7f2cc8c92dc2746aULL,
+               0x325b15e575e1c3d0ULL,
+               0x8175595b76469cbfULL,
+               0xc6df23b2dda1638bULL,
+               0x75f16f0cde063ce4ULL,
+               0x498bd6618a6e9de3ULL,
+               0xfaa59adf89c9c28cULL,
+               0xbd0fe036222e3db8ULL,
+               0x0e21ac88218962d7ULL,
+               0xc5fa92ec8aff7fb6ULL,
+               0x76d4de52895820d9ULL,
+               0x317ea4bb22bfdfedULL,
+               0x8250e80521188082ULL,
+               0xbe2a516875702185ULL,
+               0x0d041dd676d77eeaULL,
+               0x4aae673fdd3081deULL,
+               0xf9802b81de97deb1ULL,
+               0x4fc0b4dd24d2a599ULL,
+               0xfceef8632775faf6ULL,
+               0xbb44828a8c9205c2ULL,
+               0x086ace348f355aadULL,
+               0x34107759db5dfbaaULL,
+               0x873e3be7d8faa4c5ULL,
+               0xc094410e731d5bf1ULL,
+               0x73ba0db070ba049eULL,
+               0xb86133d4dbcc19ffULL,
+               0x0b4f7f6ad86b4690ULL,
+               0x4ce50583738cb9a4ULL,
+               0xffcb493d702be6cbULL,
+               0xc3b1f050244347ccULL,
+               0x709fbcee27e418a3ULL,
+               0x3735c6078c03e797ULL,
+               0x841b8ab98fa4b8f8ULL,
+               0xadda7c5f3c4488e3ULL,
+               0x1ef430e13fe3d78cULL,
+               0x595e4a08940428b8ULL,
+               0xea7006b697a377d7ULL,
+               0xd60abfdbc3cbd6d0ULL,
+               0x6524f365c06c89bfULL,
+               0x228e898c6b8b768bULL,
+               0x91a0c532682c29e4ULL,
+               0x5a7bfb56c35a3485ULL,
+               0xe955b7e8c0fd6beaULL,
+               0xaeffcd016b1a94deULL,
+               0x1dd181bf68bdcbb1ULL,
+               0x21ab38d23cd56ab6ULL,
+               0x9285746c3f7235d9ULL,
+               0xd52f0e859495caedULL,
+               0x6601423b97329582ULL,
+               0xd041dd676d77eeaaULL,
+               0x636f91d96ed0b1c5ULL,
+               0x24c5eb30c5374ef1ULL,
+               0x97eba78ec690119eULL,
+               0xab911ee392f8b099ULL,
+               0x18bf525d915feff6ULL,
+               0x5f1528b43ab810c2ULL,
+               0xec3b640a391f4fadULL,
+               0x27e05a6e926952ccULL,
+               0x94ce16d091ce0da3ULL,
+               0xd3646c393a29f297ULL,
+               0x604a2087398eadf8ULL,
+               0x5c3099ea6de60cffULL,
+               0xef1ed5546e415390ULL,
+               0xa8b4afbdc5a6aca4ULL,
+               0x1b9ae303c601f3cbULL,
+               0x56ed3e2f9e224471ULL,
+               0xe5c372919d851b1eULL,
+               0xa26908783662e42aULL,
+               0x114744c635c5bb45ULL,
+               0x2d3dfdab61ad1a42ULL,
+               0x9e13b115620a452dULL,
+               0xd9b9cbfcc9edba19ULL,
+               0x6a978742ca4ae576ULL,
+               0xa14cb926613cf817ULL,
+               0x1262f598629ba778ULL,
+               0x55c88f71c97c584cULL,
+               0xe6e6c3cfcadb0723ULL,
+               0xda9c7aa29eb3a624ULL,
+               0x69b2361c9d14f94bULL,
+               0x2e184cf536f3067fULL,
+               0x9d36004b35545910ULL,
+               0x2b769f17cf112238ULL,
+               0x9858d3a9ccb67d57ULL,
+               0xdff2a94067518263ULL,
+               0x6cdce5fe64f6dd0cULL,
+               0x50a65c93309e7c0bULL,
+               0xe388102d33392364ULL,
+               0xa4226ac498dedc50ULL,
+               0x170c267a9b79833fULL,
+               0xdcd7181e300f9e5eULL,
+               0x6ff954a033a8c131ULL,
+               0x28532e49984f3e05ULL,
+               0x9b7d62f79be8616aULL,
+               0xa707db9acf80c06dULL,
+               0x14299724cc279f02ULL,
+               0x5383edcd67c06036ULL,
+               0xe0ada17364673f59ULL
+       }
+};
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void)
+{
+       return CRC64_ECMA_182.seed;
+}
+EXPORT_SYMBOL(crc64_ecma_seed);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * pdata: pointer to the data to compute checksum for.
+ * nbytes: number of bytes in data buffer.
+ * seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed)
+{
+       unsigned int i;
+       u64 crc = seed;
+
+       for (i = 0; i < nbytes; i++)
+               crc = CRC64_ECMA_182.table[(crc ^ pdata[i]) & CRC64_BYTE_MASK] ^
+                       (crc >> 8);
+
+       return crc;
+}
+EXPORT_SYMBOL(crc64_ecma);
+
+MODULE_DESCRIPTION("CRC64 ECMA function");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_LICENSE("GPL");
index d8a5cf66c316fe21eaecee8d973deac1eb3bd5d7..b8e2080c1a47a24a14c9618cd0dac5e4da0c0c67 100644 (file)
@@ -48,11 +48,9 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
 {
        unsigned long long res;
        unsigned int rv;
-       int overflow;
 
        res = 0;
        rv = 0;
-       overflow = 0;
        while (*s) {
                unsigned int val;
 
@@ -71,15 +69,13 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
                 */
                if (unlikely(res & (~0ull << 60))) {
                        if (res > div_u64(ULLONG_MAX - val, base))
-                               overflow = 1;
+                               rv |= KSTRTOX_OVERFLOW;
                }
                res = res * base + val;
                rv++;
                s++;
        }
        *p = res;
-       if (overflow)
-               rv |= KSTRTOX_OVERFLOW;
        return rv;
 }
 
index 26caf51cc2383e80bc1902c6c65b66102e8d5af0..75554754eadfeac67e7ab160ad8e6aa45b2392ce 100644 (file)
 #include <linux/delay.h>
 #include <linux/kprobes.h>
 #include <linux/nmi.h>
+#include <linux/cpu.h>
 
-#ifdef arch_trigger_all_cpu_backtrace
+#ifdef arch_trigger_cpumask_backtrace
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 
-/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+/* "in progress" flag of arch_trigger_cpumask_backtrace */
 static unsigned long backtrace_flag;
 
 /*
- * When raise() is called it will be is passed a pointer to the
+ * When raise() is called it will be passed a pointer to the
  * backtrace_mask. Architectures that call nmi_cpu_backtrace()
  * directly from their raise() functions may rely on the mask
  * they are passed being updated as a side effect of this call.
  */
-void nmi_trigger_all_cpu_backtrace(bool include_self,
+void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
+                                  bool exclude_self,
                                   void (*raise)(cpumask_t *mask))
 {
        int i, this_cpu = get_cpu();
@@ -44,13 +46,22 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
                return;
        }
 
-       cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-       if (!include_self)
+       cpumask_copy(to_cpumask(backtrace_mask), mask);
+       if (exclude_self)
                cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
 
+       /*
+        * Don't try to send an NMI to this cpu; it may work on some
+        * architectures, but on others it may not, and we'll get
+        * information at least as useful just by doing a dump_stack() here.
+        * Note that nmi_cpu_backtrace(NULL) will clear the cpu bit.
+        */
+       if (cpumask_test_cpu(this_cpu, to_cpumask(backtrace_mask)))
+               nmi_cpu_backtrace(NULL);
+
        if (!cpumask_empty(to_cpumask(backtrace_mask))) {
-               pr_info("Sending NMI to %s CPUs:\n",
-                       (include_self ? "all" : "other"));
+               pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n",
+                       this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask));
                raise(to_cpumask(backtrace_mask));
        }
 
@@ -77,11 +88,16 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
        int cpu = smp_processor_id();
 
        if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-               pr_warn("NMI backtrace for cpu %d\n", cpu);
-               if (regs)
-                       show_regs(regs);
-               else
-                       dump_stack();
+               if (regs && cpu_in_idle(instruction_pointer(regs))) {
+                       pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n",
+                               cpu, instruction_pointer(regs));
+               } else {
+                       pr_warn("NMI backtrace for cpu %d\n", cpu);
+                       if (regs)
+                               show_regs(regs);
+                       else
+                               dump_stack();
+               }
                cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
                return true;
        }
index 60f77f1d470a0589ccdeacbb52a9eb45a8a1cec1..da015cda4f4baf68b4007a91a8d40287fe7f48b5 100644 (file)
@@ -243,6 +243,12 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
                alloc_flags &= ~GFP_ZONEMASK;
                alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
                alloc_flags |= __GFP_NOWARN;
+               /*
+                * Avoid using current->mempolicy which may already have
+                * been freed -- we may be in the process of saving the
+                * stack for exactly that __mpol_put() call.
+                */
+               alloc_flags |= __GFP_THISNODE;
                page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
                if (page)
                        prealloc = page_address(page);
index 9c5fe81104135364bca9f2b0da47f4e2a1ed51fc..7e35fc450c5bb780121cf3e6df0a87abaa740f5b 100644 (file)
@@ -1,6 +1,7 @@
 #include <linux/compiler.h>
 #include <linux/export.h>
 #include <linux/kasan-checks.h>
+#include <linux/thread_info.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -111,6 +112,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
                long retval;
 
                kasan_check_write(dst, count);
+               check_object_size(dst, count, false);
                user_access_begin();
                retval = do_strncpy_from_user(dst, src, count, max);
                user_access_end();
index 0aa7dda5240291b87cc8d43e7795cb1078a4507f..a869f84f44d38a8905619b71052bfadc0de81dea 100644 (file)
 #include <linux/init.h>
 #include <linux/pfn.h>
 #include <linux/slab.h>
-#include <linux/bootmem.h>
 #include <linux/export.h>
 #include <linux/kmemleak.h>
 #include <linux/range.h>
-#include <linux/memblock.h>
 #include <linux/bug.h>
 #include <linux/io.h>
-
-#include <asm/processor.h>
+#include <linux/bootmem.h>
 
 #include "internal.h"
 
@@ -712,7 +709,7 @@ void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
        void *ptr;
 
        if (WARN_ON_ONCE(slab_is_available()))
-               return kzalloc(size, GFP_NOWAIT);
+               return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 again:
 
        /* do not panic in alloc_bootmem_bdata() */
@@ -738,9 +735,6 @@ again:
 void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
                                   unsigned long align, unsigned long goal)
 {
-       if (WARN_ON_ONCE(slab_is_available()))
-               return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
        return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
 }
 
@@ -812,10 +806,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
 
 }
 
-#ifndef ARCH_LOW_ADDRESS_LIMIT
-#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
-#endif
-
 /**
  * __alloc_bootmem_low - allocate low boot memory
  * @size: size of the request in bytes
index 9affb290830421de71f82eac65d7e4f7ce78cb9d..86d4d0bbfc7c78c10fb40c16a766374212414b45 100644 (file)
@@ -997,8 +997,12 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
 #ifdef CONFIG_COMPACTION
 
 /* Returns true if the page is within a block suitable for migration to */
-static bool suitable_migration_target(struct page *page)
+static bool suitable_migration_target(struct compact_control *cc,
+                                                       struct page *page)
 {
+       if (cc->ignore_block_suitable)
+               return true;
+
        /* If the page is a large free page, then disallow migration */
        if (PageBuddy(page)) {
                /*
@@ -1083,7 +1087,7 @@ static void isolate_freepages(struct compact_control *cc)
                        continue;
 
                /* Check the block is suitable for migration */
-               if (!suitable_migration_target(page))
+               if (!suitable_migration_target(cc, page))
                        continue;
 
                /* If isolation recently failed, do not retry */
@@ -1316,7 +1320,7 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_
                return COMPACT_CONTINUE;
 
        /* Compaction run is not finished if the watermark is not met */
-       watermark = low_wmark_pages(zone);
+       watermark = zone->watermark[cc->alloc_flags & ALLOC_WMARK_MASK];
 
        if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx,
                                                        cc->alloc_flags))
@@ -1329,13 +1333,13 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_
 
                /* Job done if page is free of the right migratetype */
                if (!list_empty(&area->free_list[migratetype]))
-                       return COMPACT_PARTIAL;
+                       return COMPACT_SUCCESS;
 
 #ifdef CONFIG_CMA
                /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
                if (migratetype == MIGRATE_MOVABLE &&
                        !list_empty(&area->free_list[MIGRATE_CMA]))
-                       return COMPACT_PARTIAL;
+                       return COMPACT_SUCCESS;
 #endif
                /*
                 * Job done if allocation would steal freepages from
@@ -1343,7 +1347,7 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_
                 */
                if (find_suitable_fallback(area, order, migratetype,
                                                true, &can_steal) != -1)
-                       return COMPACT_PARTIAL;
+                       return COMPACT_SUCCESS;
        }
 
        return COMPACT_NO_SUITABLE_PAGE;
@@ -1367,7 +1371,7 @@ static enum compact_result compact_finished(struct zone *zone,
  * compaction_suitable: Is this suitable to run compaction on this zone now?
  * Returns
  *   COMPACT_SKIPPED  - If there are too few free pages for compaction
- *   COMPACT_PARTIAL  - If the allocation would succeed without compaction
+ *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
  *   COMPACT_CONTINUE - If compaction should run now
  */
 static enum compact_result __compaction_suitable(struct zone *zone, int order,
@@ -1381,23 +1385,34 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
        if (is_via_compact_memory(order))
                return COMPACT_CONTINUE;
 
-       watermark = low_wmark_pages(zone);
+       watermark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
        /*
         * If watermarks for high-order allocation are already met, there
         * should be no need for compaction at all.
         */
        if (zone_watermark_ok(zone, order, watermark, classzone_idx,
                                                                alloc_flags))
-               return COMPACT_PARTIAL;
+               return COMPACT_SUCCESS;
 
        /*
-        * Watermarks for order-0 must be met for compaction. Note the 2UL.
-        * This is because during migration, copies of pages need to be
-        * allocated and for a short time, the footprint is higher
+        * Watermarks for order-0 must be met for compaction to be able to
+        * isolate free pages for migration targets. This means that the
+        * watermark and alloc_flags have to match, or be more pessimistic than
+        * the check in __isolate_free_page(). We don't use the direct
+        * compactor's alloc_flags, as they are not relevant for freepage
+        * isolation. We however do use the direct compactor's classzone_idx to
+        * skip over zones where lowmem reserves would prevent allocation even
+        * if compaction succeeds.
+        * For costly orders, we require low watermark instead of min for
+        * compaction to proceed to increase its chances.
+        * ALLOC_CMA is used, as pages in CMA pageblocks are considered
+        * suitable migration targets
         */
-       watermark += (2UL << order);
+       watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
+                               low_wmark_pages(zone) : min_wmark_pages(zone);
+       watermark += compact_gap(order);
        if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
-                                alloc_flags, wmark_target))
+                                               ALLOC_CMA, wmark_target))
                return COMPACT_SKIPPED;
 
        /*
@@ -1477,7 +1492,7 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
        ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
                                                        cc->classzone_idx);
        /* Compaction is likely to fail */
-       if (ret == COMPACT_PARTIAL || ret == COMPACT_SKIPPED)
+       if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
                return ret;
 
        /* huh, compaction_suitable is returning something unexpected */
@@ -1492,23 +1507,29 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
 
        /*
         * Setup to move all movable pages to the end of the zone. Used cached
-        * information on where the scanners should start but check that it
-        * is initialised by ensuring the values are within zone boundaries.
+        * information on where the scanners should start (unless we explicitly
+        * want to compact the whole zone), but check that it is initialised
+        * by ensuring the values are within zone boundaries.
         */
-       cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
-       cc->free_pfn = zone->compact_cached_free_pfn;
-       if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
-               cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
-               zone->compact_cached_free_pfn = cc->free_pfn;
-       }
-       if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
+       if (cc->whole_zone) {
                cc->migrate_pfn = start_pfn;
-               zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
-               zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
-       }
+               cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
+       } else {
+               cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
+               cc->free_pfn = zone->compact_cached_free_pfn;
+               if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
+                       cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
+                       zone->compact_cached_free_pfn = cc->free_pfn;
+               }
+               if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
+                       cc->migrate_pfn = start_pfn;
+                       zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
+                       zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
+               }
 
-       if (cc->migrate_pfn == start_pfn)
-               cc->whole_zone = true;
+               if (cc->migrate_pfn == start_pfn)
+                       cc->whole_zone = true;
+       }
 
        cc->last_migrated_pfn = 0;
 
@@ -1638,6 +1659,9 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
                .alloc_flags = alloc_flags,
                .classzone_idx = classzone_idx,
                .direct_compaction = true,
+               .whole_zone = (prio == MIN_COMPACT_PRIORITY),
+               .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
+               .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
        };
        INIT_LIST_HEAD(&cc.freepages);
        INIT_LIST_HEAD(&cc.migratepages);
@@ -1683,7 +1707,8 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
                                                                ac->nodemask) {
                enum compact_result status;
 
-               if (compaction_deferred(zone, order)) {
+               if (prio > MIN_COMPACT_PRIORITY
+                                       && compaction_deferred(zone, order)) {
                        rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
                        continue;
                }
@@ -1692,9 +1717,8 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
                                        alloc_flags, ac_classzone_idx(ac));
                rc = max(status, rc);
 
-               /* If a normal allocation would succeed, stop compacting */
-               if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
-                                       ac_classzone_idx(ac), alloc_flags)) {
+               /* The allocation should succeed, stop compacting */
+               if (status == COMPACT_SUCCESS) {
                        /*
                         * We think the allocation will succeed in this zone,
                         * but it is not certain, hence the false. The caller
@@ -1730,10 +1754,18 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
 
 
 /* Compact all zones within a node */
-static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
+static void compact_node(int nid)
 {
+       pg_data_t *pgdat = NODE_DATA(nid);
        int zoneid;
        struct zone *zone;
+       struct compact_control cc = {
+               .order = -1,
+               .mode = MIGRATE_SYNC,
+               .ignore_skip_hint = true,
+               .whole_zone = true,
+       };
+
 
        for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
 
@@ -1741,60 +1773,19 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
                if (!populated_zone(zone))
                        continue;
 
-               cc->nr_freepages = 0;
-               cc->nr_migratepages = 0;
-               cc->zone = zone;
-               INIT_LIST_HEAD(&cc->freepages);
-               INIT_LIST_HEAD(&cc->migratepages);
-
-               /*
-                * When called via /proc/sys/vm/compact_memory
-                * this makes sure we compact the whole zone regardless of
-                * cached scanner positions.
-                */
-               if (is_via_compact_memory(cc->order))
-                       __reset_isolation_suitable(zone);
-
-               if (is_via_compact_memory(cc->order) ||
-                               !compaction_deferred(zone, cc->order))
-                       compact_zone(zone, cc);
-
-               VM_BUG_ON(!list_empty(&cc->freepages));
-               VM_BUG_ON(!list_empty(&cc->migratepages));
+               cc.nr_freepages = 0;
+               cc.nr_migratepages = 0;
+               cc.zone = zone;
+               INIT_LIST_HEAD(&cc.freepages);
+               INIT_LIST_HEAD(&cc.migratepages);
 
-               if (is_via_compact_memory(cc->order))
-                       continue;
+               compact_zone(zone, &cc);
 
-               if (zone_watermark_ok(zone, cc->order,
-                               low_wmark_pages(zone), 0, 0))
-                       compaction_defer_reset(zone, cc->order, false);
+               VM_BUG_ON(!list_empty(&cc.freepages));
+               VM_BUG_ON(!list_empty(&cc.migratepages));
        }
 }
 
-void compact_pgdat(pg_data_t *pgdat, int order)
-{
-       struct compact_control cc = {
-               .order = order,
-               .mode = MIGRATE_ASYNC,
-       };
-
-       if (!order)
-               return;
-
-       __compact_pgdat(pgdat, &cc);
-}
-
-static void compact_node(int nid)
-{
-       struct compact_control cc = {
-               .order = -1,
-               .mode = MIGRATE_SYNC,
-               .ignore_skip_hint = true,
-       };
-
-       __compact_pgdat(NODE_DATA(nid), &cc);
-}
-
 /* Compact all nodes in the system */
 static void compact_nodes(void)
 {
@@ -1900,8 +1891,6 @@ static void kcompactd_do_work(pg_data_t *pgdat)
                .ignore_skip_hint = true,
 
        };
-       bool success = false;
-
        trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
                                                        cc.classzone_idx);
        count_vm_event(KCOMPACTD_WAKE);
@@ -1930,9 +1919,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
                        return;
                status = compact_zone(zone, &cc);
 
-               if (zone_watermark_ok(zone, cc.order, low_wmark_pages(zone),
-                                               cc.classzone_idx, 0)) {
-                       success = true;
+               if (status == COMPACT_SUCCESS) {
                        compaction_defer_reset(zone, cc.order, false);
                } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
                        /*
index 8865bfb41b0b6e514a8f297c335a8682763bad6a..74c7cae4f6837dcb5a599df800e7300ff851519c 100644 (file)
@@ -42,9 +42,11 @@ const struct trace_print_flags vmaflag_names[] = {
 
 void __dump_page(struct page *page, const char *reason)
 {
+       int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
+
        pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
-                 page, page_ref_count(page), page_mapcount(page),
-                 page->mapping, page->index);
+                 page, page_ref_count(page), mapcount,
+                 page->mapping, page_to_pgoff(page));
        if (PageCompound(page))
                pr_cont(" compound_mapcount: %d", compound_mapcount(page));
        pr_cont("\n");
index 8a287dfc53722c724fa3ec21992609fa2a8c3c58..cbe57570a03d240817f408e9da43ade2d747b794 100644 (file)
@@ -1708,7 +1708,9 @@ find_page:
                         * wait_on_page_locked is used to avoid unnecessarily
                         * serialisations and why it's safe.
                         */
-                       wait_on_page_locked_killable(page);
+                       error = wait_on_page_locked_killable(page);
+                       if (unlikely(error))
+                               goto readpage_error;
                        if (PageUptodate(page))
                                goto page_ok;
 
index a6abd76baa725d56eb1e9adeb8ee1b6fca86cb80..d76700d280879f68a3457effb4978de19a51d39b 100644 (file)
@@ -59,7 +59,7 @@ static struct shrinker deferred_split_shrinker;
 static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
 
-struct page *get_huge_zero_page(void)
+static struct page *get_huge_zero_page(void)
 {
        struct page *zero_page;
 retry:
@@ -86,7 +86,7 @@ retry:
        return READ_ONCE(huge_zero_page);
 }
 
-void put_huge_zero_page(void)
+static void put_huge_zero_page(void)
 {
        /*
         * Counter should never go to zero here. Only shrinker can put
@@ -95,6 +95,26 @@ void put_huge_zero_page(void)
        BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
 }
 
+struct page *mm_get_huge_zero_page(struct mm_struct *mm)
+{
+       if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+               return READ_ONCE(huge_zero_page);
+
+       if (!get_huge_zero_page())
+               return NULL;
+
+       if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+               put_huge_zero_page();
+
+       return READ_ONCE(huge_zero_page);
+}
+
+void mm_put_huge_zero_page(struct mm_struct *mm)
+{
+       if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+               put_huge_zero_page();
+}
+
 static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
                                        struct shrink_control *sc)
 {
@@ -469,6 +489,49 @@ void prep_transhuge_page(struct page *page)
        set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
 }
 
+unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len,
+               loff_t off, unsigned long flags, unsigned long size)
+{
+       unsigned long addr;
+       loff_t off_end = off + len;
+       loff_t off_align = round_up(off, size);
+       unsigned long len_pad;
+
+       if (off_end <= off_align || (off_end - off_align) < size)
+               return 0;
+
+       len_pad = len + size;
+       if (len_pad < len || (off + len_pad) < off)
+               return 0;
+
+       addr = current->mm->get_unmapped_area(filp, 0, len_pad,
+                                             off >> PAGE_SHIFT, flags);
+       if (IS_ERR_VALUE(addr))
+               return 0;
+
+       addr += (off - addr) & (size - 1);
+       return addr;
+}
+
+unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
+               unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+       loff_t off = (loff_t)pgoff << PAGE_SHIFT;
+
+       if (addr)
+               goto out;
+       if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD))
+               goto out;
+
+       addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE);
+       if (addr)
+               return addr;
+
+ out:
+       return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+}
+EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
+
 static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
                gfp_t gfp)
 {
@@ -601,7 +664,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
                pgtable = pte_alloc_one(vma->vm_mm, haddr);
                if (unlikely(!pgtable))
                        return VM_FAULT_OOM;
-               zero_page = get_huge_zero_page();
+               zero_page = mm_get_huge_zero_page(vma->vm_mm);
                if (unlikely(!zero_page)) {
                        pte_free(vma->vm_mm, pgtable);
                        count_vm_event(THP_FAULT_FALLBACK);
@@ -623,10 +686,8 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
                        }
                } else
                        spin_unlock(fe->ptl);
-               if (!set) {
+               if (!set)
                        pte_free(vma->vm_mm, pgtable);
-                       put_huge_zero_page();
-               }
                return ret;
        }
        gfp = alloc_hugepage_direct_gfpmask(vma);
@@ -780,7 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 * since we already have a zero page to copy. It just takes a
                 * reference.
                 */
-               zero_page = get_huge_zero_page();
+               zero_page = mm_get_huge_zero_page(dst_mm);
                set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
                                zero_page);
                ret = 0;
@@ -1038,7 +1099,6 @@ alloc:
                update_mmu_cache_pmd(vma, fe->address, fe->pmd);
                if (!page) {
                        add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-                       put_huge_zero_page();
                } else {
                        VM_BUG_ON_PAGE(!PageHead(page), page);
                        page_remove_rmap(page, true);
@@ -1502,7 +1562,6 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
        }
        smp_wmb(); /* make pte visible before pmd */
        pmd_populate(mm, pmd, pgtable);
-       put_huge_zero_page();
 }
 
 static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
@@ -1525,8 +1584,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 
        if (!vma_is_anonymous(vma)) {
                _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
-               if (is_huge_zero_pmd(_pmd))
-                       put_huge_zero_page();
                if (vma_is_dax(vma))
                        return;
                page = pmd_page(_pmd);
index 1501304f87a41aaa48b0b32d16917a51ba73a1b7..537ac9951f5fa6256975152fc599ff44cc24ac95 100644 (file)
@@ -178,8 +178,9 @@ struct compact_control {
        unsigned long last_migrated_pfn;/* Not yet flushed page being freed */
        enum migrate_mode mode;         /* Async or sync migration mode */
        bool ignore_skip_hint;          /* Scan blocks even if marked skip */
+       bool ignore_block_suitable;     /* Scan blocks considered unsuitable */
        bool direct_compaction;         /* False from kcompactd or /proc/... */
-       bool whole_zone;                /* Whole zone has been scanned */
+       bool whole_zone;                /* Whole zone should/has been scanned */
        int order;                      /* order a direct compactor needs */
        const gfp_t gfp_mask;           /* gfp mask of a direct compactor */
        const unsigned int alloc_flags; /* alloc flags of a direct compactor */
index 79c52d0061af591b0417e5d67462f49fda8ac632..728d7790dc2da27175d403f314a359cec70da5fc 100644 (file)
@@ -838,7 +838,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
  * value (scan code).
  */
 
-static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
+static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
+               struct vm_area_struct **vmap)
 {
        struct vm_area_struct *vma;
        unsigned long hstart, hend;
@@ -846,7 +847,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
        if (unlikely(khugepaged_test_exit(mm)))
                return SCAN_ANY_PROCESS;
 
-       vma = find_vma(mm, address);
+       *vmap = vma = find_vma(mm, address);
        if (!vma)
                return SCAN_VMA_NULL;
 
@@ -881,6 +882,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
                .pmd = pmd,
        };
 
+       /* we only decide to swapin, if there is enough young ptes */
+       if (referenced < HPAGE_PMD_NR/2) {
+               trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+               return false;
+       }
        fe.pte = pte_offset_map(pmd, address);
        for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
                        fe.pte++, fe.address += PAGE_SIZE) {
@@ -888,17 +894,12 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
                if (!is_swap_pte(pteval))
                        continue;
                swapped_in++;
-               /* we only decide to swapin, if there is enough young ptes */
-               if (referenced < HPAGE_PMD_NR/2) {
-                       trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
-                       return false;
-               }
                ret = do_swap_page(&fe, pteval);
 
                /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
                if (ret & VM_FAULT_RETRY) {
                        down_read(&mm->mmap_sem);
-                       if (hugepage_vma_revalidate(mm, address)) {
+                       if (hugepage_vma_revalidate(mm, address, &fe.vma)) {
                                /* vma is no longer available, don't continue to swapin */
                                trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
                                return false;
@@ -923,7 +924,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 static void collapse_huge_page(struct mm_struct *mm,
                                   unsigned long address,
                                   struct page **hpage,
-                                  struct vm_area_struct *vma,
                                   int node, int referenced)
 {
        pmd_t *pmd, _pmd;
@@ -933,6 +933,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        spinlock_t *pmd_ptl, *pte_ptl;
        int isolated = 0, result = 0;
        struct mem_cgroup *memcg;
+       struct vm_area_struct *vma;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
        gfp_t gfp;
@@ -961,7 +962,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        }
 
        down_read(&mm->mmap_sem);
-       result = hugepage_vma_revalidate(mm, address);
+       result = hugepage_vma_revalidate(mm, address, &vma);
        if (result) {
                mem_cgroup_cancel_charge(new_page, memcg, true);
                up_read(&mm->mmap_sem);
@@ -994,7 +995,7 @@ static void collapse_huge_page(struct mm_struct *mm,
         * handled by the anon_vma lock + PG_lock.
         */
        down_write(&mm->mmap_sem);
-       result = hugepage_vma_revalidate(mm, address);
+       result = hugepage_vma_revalidate(mm, address, &vma);
        if (result)
                goto out;
        /* check if the pmd is still valid */
@@ -1202,7 +1203,7 @@ out_unmap:
        if (ret) {
                node = khugepaged_find_target_node();
                /* collapse_huge_page will return with the mmap_sem released */
-               collapse_huge_page(mm, address, hpage, vma, node, referenced);
+               collapse_huge_page(mm, address, hpage, node, referenced);
        }
 out:
        trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
index 483197ef613f258838c40ca4e9869bc126e7e9ea..c8dfa430342be77ad35cea98dfad746c36aa26b3 100644 (file)
@@ -1438,6 +1438,11 @@ phys_addr_t __init_memblock memblock_phys_mem_size(void)
        return memblock.memory.total_size;
 }
 
+phys_addr_t __init_memblock memblock_reserved_size(void)
+{
+       return memblock.reserved.total_size;
+}
+
 phys_addr_t __init memblock_mem_size(unsigned long limit_pfn)
 {
        unsigned long pages = 0;
index 9a6a51a7c416074ccd5d974ac9f17a54c85d14db..7a8d6624758a559489e8a6295be6b99cb2ad5c82 100644 (file)
@@ -920,6 +920,43 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
             iter != NULL;                              \
             iter = mem_cgroup_iter(NULL, iter, NULL))
 
+/**
+ * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy
+ * @memcg: hierarchy root
+ * @fn: function to call for each task
+ * @arg: argument passed to @fn
+ *
+ * This function iterates over tasks attached to @memcg or to any of its
+ * descendants and calls @fn for each task. If @fn returns a non-zero
+ * value, the function breaks the iteration loop and returns the value.
+ * Otherwise, it will iterate over all tasks and return 0.
+ *
+ * This function must not be called for the root memory cgroup.
+ */
+int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
+                         int (*fn)(struct task_struct *, void *), void *arg)
+{
+       struct mem_cgroup *iter;
+       int ret = 0;
+
+       BUG_ON(memcg == root_mem_cgroup);
+
+       for_each_mem_cgroup_tree(iter, memcg) {
+               struct css_task_iter it;
+               struct task_struct *task;
+
+               css_task_iter_start(&iter->css, &it);
+               while (!ret && (task = css_task_iter_next(&it)))
+                       ret = fn(task, arg);
+               css_task_iter_end(&it);
+               if (ret) {
+                       mem_cgroup_iter_break(memcg, iter);
+                       break;
+               }
+       }
+       return ret;
+}
+
 /**
  * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
  * @page: the page
@@ -1178,7 +1215,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
 /*
  * Return the memory (and swap, if configured) limit for a memcg.
  */
-static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
+unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
 {
        unsigned long limit;
 
@@ -1205,79 +1242,12 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
                .gfp_mask = gfp_mask,
                .order = order,
        };
-       struct mem_cgroup *iter;
-       unsigned long chosen_points = 0;
-       unsigned long totalpages;
-       unsigned int points = 0;
-       struct task_struct *chosen = NULL;
+       bool ret;
 
        mutex_lock(&oom_lock);
-
-       /*
-        * If current has a pending SIGKILL or is exiting, then automatically
-        * select it.  The goal is to allow it to allocate so that it may
-        * quickly exit and free its memory.
-        */
-       if (task_will_free_mem(current)) {
-               mark_oom_victim(current);
-               wake_oom_reaper(current);
-               goto unlock;
-       }
-
-       check_panic_on_oom(&oc, CONSTRAINT_MEMCG);
-       totalpages = mem_cgroup_get_limit(memcg) ? : 1;
-       for_each_mem_cgroup_tree(iter, memcg) {
-               struct css_task_iter it;
-               struct task_struct *task;
-
-               css_task_iter_start(&iter->css, &it);
-               while ((task = css_task_iter_next(&it))) {
-                       switch (oom_scan_process_thread(&oc, task)) {
-                       case OOM_SCAN_SELECT:
-                               if (chosen)
-                                       put_task_struct(chosen);
-                               chosen = task;
-                               chosen_points = ULONG_MAX;
-                               get_task_struct(chosen);
-                               /* fall through */
-                       case OOM_SCAN_CONTINUE:
-                               continue;
-                       case OOM_SCAN_ABORT:
-                               css_task_iter_end(&it);
-                               mem_cgroup_iter_break(memcg, iter);
-                               if (chosen)
-                                       put_task_struct(chosen);
-                               /* Set a dummy value to return "true". */
-                               chosen = (void *) 1;
-                               goto unlock;
-                       case OOM_SCAN_OK:
-                               break;
-                       };
-                       points = oom_badness(task, memcg, NULL, totalpages);
-                       if (!points || points < chosen_points)
-                               continue;
-                       /* Prefer thread group leaders for display purposes */
-                       if (points == chosen_points &&
-                           thread_group_leader(chosen))
-                               continue;
-
-                       if (chosen)
-                               put_task_struct(chosen);
-                       chosen = task;
-                       chosen_points = points;
-                       get_task_struct(chosen);
-               }
-               css_task_iter_end(&it);
-       }
-
-       if (chosen) {
-               points = chosen_points * 1000 / totalpages;
-               oom_kill_process(&oc, chosen, points, totalpages,
-                                "Memory cgroup out of memory");
-       }
-unlock:
+       ret = out_of_memory(&oc);
        mutex_unlock(&oom_lock);
-       return chosen;
+       return ret;
 }
 
 #if MAX_NUMNODES > 1
@@ -1600,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
        if (!memcg)
                return false;
 
-       if (!handle || oom_killer_disabled)
+       if (!handle)
                goto cleanup;
 
        owait.memcg = memcg;
@@ -4079,11 +4049,13 @@ static DEFINE_IDR(mem_cgroup_idr);
 
 static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
 {
+       VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
        atomic_add(n, &memcg->id.ref);
 }
 
 static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
 {
+       VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
        if (atomic_sub_and_test(n, &memcg->id.ref)) {
                idr_remove(&mem_cgroup_idr, memcg->id.id);
                memcg->id.id = 0;
@@ -4272,8 +4244,10 @@ fail:
 
 static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
+       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
        /* Online state pins memcg ID, memcg ID pins CSS */
-       mem_cgroup_id_get(mem_cgroup_from_css(css));
+       atomic_set(&memcg->id.ref, 1);
        css_get(css);
        return 0;
 }
@@ -4421,7 +4395,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
         * Because lookup_swap_cache() updates some statistics counter,
         * we call find_get_page() with swapper_space directly.
         */
-       page = find_get_page(swap_address_space(ent), ent.val);
+       page = find_get_page(swap_address_space(ent), swp_offset(ent));
        if (do_memsw_account())
                entry->val = ent.val;
 
@@ -4459,7 +4433,8 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
                        swp_entry_t swp = radix_to_swp_entry(page);
                        if (do_memsw_account())
                                *entry = swp;
-                       page = find_get_page(swap_address_space(swp), swp.val);
+                       page = find_get_page(swap_address_space(swp),
+                                            swp_offset(swp));
                }
        } else
                page = find_get_page(mapping, pgoff);
@@ -4694,7 +4669,8 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
                .mm = mm,
        };
        down_read(&mm->mmap_sem);
-       walk_page_range(0, ~0UL, &mem_cgroup_count_precharge_walk);
+       walk_page_range(0, mm->highest_vm_end,
+                       &mem_cgroup_count_precharge_walk);
        up_read(&mm->mmap_sem);
 
        precharge = mc.precharge;
@@ -4982,7 +4958,8 @@ retry:
         * When we have consumed all precharges and failed in doing
         * additional charge, the page walk just aborts.
         */
-       walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
+       walk_page_range(0, mc.mm->highest_vm_end, &mem_cgroup_move_charge_walk);
+
        up_read(&mc.mm->mmap_sem);
        atomic_dec(&mc.from->moving_account);
 }
index 83be99d9d8a15006e2cb267a3dd46e10e470f66e..62ff122a7a237a353b08fb9514b104dca521209b 100644 (file)
@@ -1649,10 +1649,14 @@ EXPORT_SYMBOL(vm_insert_pfn_prot);
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                        pfn_t pfn)
 {
+       pgprot_t pgprot = vma->vm_page_prot;
+
        BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
 
        if (addr < vma->vm_start || addr >= vma->vm_end)
                return -EFAULT;
+       if (track_pfn_insert(vma, &pgprot, pfn))
+               return -EINVAL;
 
        /*
         * If we don't have pte special, then we have to use the pfn_valid()
@@ -1670,9 +1674,9 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                 * result in pfn_t_has_page() == false.
                 */
                page = pfn_to_page(pfn_t_to_pfn(pfn));
-               return insert_page(vma, addr, page, vma->vm_page_prot);
+               return insert_page(vma, addr, page, pgprot);
        }
-       return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+       return insert_pfn(vma, addr, pfn, pgprot);
 }
 EXPORT_SYMBOL(vm_insert_mixed);
 
@@ -3656,6 +3660,19 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
                         mem_cgroup_oom_synchronize(false);
        }
 
+       /*
+        * This mm has been already reaped by the oom reaper and so the
+        * refault cannot be trusted in general. Anonymous refaults would
+        * lose data and give a zero page instead e.g. This is especially
+        * problem for use_mm() because regular tasks will just die and
+        * the corrupted data will not be visible anywhere while kthread
+        * will outlive the oom victim and potentially propagate the date
+        * further.
+        */
+       if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR)
+                               && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags)))
+               ret = VM_FAULT_SIGBUS;
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(handle_mm_fault);
index 41266dc29f33fb1278d7e4e9d6fd2efab69380a1..b58906b6215cbb3d2b43abbcdb758e0809c42c7f 100644 (file)
@@ -1567,7 +1567,9 @@ static struct page *new_node_page(struct page *page, unsigned long private,
                return alloc_huge_page_node(page_hstate(compound_head(page)),
                                        next_node_in(nid, nmask));
 
-       node_clear(nid, nmask);
+       if (nid != next_node_in(nid, nmask))
+               node_clear(nid, nmask);
+
        if (PageHighMem(page)
            || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
                gfp_mask |= __GFP_HIGHMEM;
index 2da72a5b6ecc04f87bd9168fc31d613360e40f2e..ad1c96ac313c0f442b1635baaa96555d355a74bb 100644 (file)
@@ -1749,7 +1749,7 @@ unsigned int mempolicy_slab_node(void)
                 */
                struct zonelist *zonelist;
                enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL);
-               zonelist = &NODE_DATA(node)->node_zonelists[0];
+               zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK];
                z = first_zones_zonelist(zonelist, highest_zoneidx,
                                                        &policy->v.nodes);
                return z->zone ? z->zone->node : node;
index c0b5ba965200942741347500c0b6a739434b715d..bfb866435478b33dada2231b5f64553f7208c75e 100644 (file)
@@ -66,7 +66,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
                 */
                if (radix_tree_exceptional_entry(page)) {
                        swp_entry_t swp = radix_to_swp_entry(page);
-                       page = find_get_page(swap_address_space(swp), swp.val);
+                       page = find_get_page(swap_address_space(swp),
+                                            swp_offset(swp));
                }
        } else
                page = find_get_page(mapping, pgoff);
@@ -150,7 +151,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                        } else {
 #ifdef CONFIG_SWAP
                                *vec = mincore_page(swap_address_space(entry),
-                                       entry.val);
+                                                   swp_offset(entry));
 #else
                                WARN_ON(1);
                                *vec = 1;
index 14645be06e301cfc4a62301e25818365e766874e..145a4258ddbc775d1d2acf133a21b83280a8f282 100644 (file)
@@ -516,6 +516,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
        int nr_pages;
        int ret = 0;
        int lock = !!(newflags & VM_LOCKED);
+       vm_flags_t old_flags = vma->vm_flags;
 
        if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
            is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
@@ -550,6 +551,8 @@ success:
        nr_pages = (end - start) >> PAGE_SHIFT;
        if (!lock)
                nr_pages = -nr_pages;
+       else if (old_flags & VM_LOCKED)
+               nr_pages = 0;
        mm->locked_vm += nr_pages;
 
        /*
@@ -617,6 +620,45 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
        return error;
 }
 
+/*
+ * Go through vma areas and sum size of mlocked
+ * vma pages, as return value.
+ * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
+ * is also counted.
+ * Return value: previously mlocked page counts
+ */
+static int count_mm_mlocked_page_nr(struct mm_struct *mm,
+               unsigned long start, size_t len)
+{
+       struct vm_area_struct *vma;
+       int count = 0;
+
+       if (mm == NULL)
+               mm = current->mm;
+
+       vma = find_vma(mm, start);
+       if (vma == NULL)
+               vma = mm->mmap;
+
+       for (; vma ; vma = vma->vm_next) {
+               if (start >= vma->vm_end)
+                       continue;
+               if (start + len <=  vma->vm_start)
+                       break;
+               if (vma->vm_flags & VM_LOCKED) {
+                       if (start > vma->vm_start)
+                               count -= (start - vma->vm_start);
+                       if (start + len < vma->vm_end) {
+                               count += start + len - vma->vm_start;
+                               break;
+                       }
+                       count += vma->vm_end - vma->vm_start;
+               }
+       }
+
+       return count >> PAGE_SHIFT;
+}
+
 static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
 {
        unsigned long locked;
@@ -639,6 +681,16 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
                return -EINTR;
 
        locked += current->mm->locked_vm;
+       if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
+               /*
+                * It is possible that the regions requested intersect with
+                * previously mlocked areas, that part area in "mm->locked_vm"
+                * should not be counted to new mlock increment count. So check
+                * and adjust locked count if necessary.
+                */
+               locked -= count_mm_mlocked_page_nr(current->mm,
+                               start, len);
+       }
 
        /* check against resource limits */
        if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
index bd05a70f44b96c81d4d7c119fd4a807f1af15bd6..490d46abddad2732346465befa7a3349bccc9e75 100644 (file)
 #include <linux/init.h>
 #include <linux/pfn.h>
 #include <linux/slab.h>
-#include <linux/bootmem.h>
 #include <linux/export.h>
 #include <linux/kmemleak.h>
 #include <linux/range.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 
 #include <asm/bug.h>
 #include <asm/io.h>
-#include <asm/processor.h>
 
 #include "internal.h"
 
+#ifndef CONFIG_HAVE_MEMBLOCK
+#error CONFIG_HAVE_MEMBLOCK not defined
+#endif
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 struct pglist_data __refdata contig_page_data;
 EXPORT_SYMBOL(contig_page_data);
@@ -395,9 +398,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
        return __alloc_bootmem_node(pgdat, size, align, goal);
 }
 
-#ifndef ARCH_LOW_ADDRESS_LIMIT
-#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
-#endif
 
 /**
  * __alloc_bootmem_low - allocate low boot memory
index d53a9aa00977cbd0f81970e9e8a30b011cc73f31..f284e92a71f07630fb1a326b384d757ef6903cb6 100644 (file)
@@ -132,6 +132,11 @@ static inline bool is_sysrq_oom(struct oom_control *oc)
        return oc->order == -1;
 }
 
+static inline bool is_memcg_oom(struct oom_control *oc)
+{
+       return oc->memcg != NULL;
+}
+
 /* return true if the task is not adequate as candidate victim task. */
 static bool oom_unkillable_task(struct task_struct *p,
                struct mem_cgroup *memcg, const nodemask_t *nodemask)
@@ -181,7 +186,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
         */
        adj = (long)p->signal->oom_score_adj;
        if (adj == OOM_SCORE_ADJ_MIN ||
-                       test_bit(MMF_OOM_REAPED, &p->mm->flags) ||
+                       test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
                        in_vfork(p)) {
                task_unlock(p);
                return 0;
@@ -213,12 +218,17 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
        return points > 0 ? points : 1;
 }
 
+enum oom_constraint {
+       CONSTRAINT_NONE,
+       CONSTRAINT_CPUSET,
+       CONSTRAINT_MEMORY_POLICY,
+       CONSTRAINT_MEMCG,
+};
+
 /*
  * Determine the type of allocation constraint.
  */
-#ifdef CONFIG_NUMA
-static enum oom_constraint constrained_alloc(struct oom_control *oc,
-                                            unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc)
 {
        struct zone *zone;
        struct zoneref *z;
@@ -226,8 +236,16 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
        bool cpuset_limited = false;
        int nid;
 
+       if (is_memcg_oom(oc)) {
+               oc->totalpages = mem_cgroup_get_limit(oc->memcg) ?: 1;
+               return CONSTRAINT_MEMCG;
+       }
+
        /* Default to all available memory */
-       *totalpages = totalram_pages + total_swap_pages;
+       oc->totalpages = totalram_pages + total_swap_pages;
+
+       if (!IS_ENABLED(CONFIG_NUMA))
+               return CONSTRAINT_NONE;
 
        if (!oc->zonelist)
                return CONSTRAINT_NONE;
@@ -246,9 +264,9 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
         */
        if (oc->nodemask &&
            !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
-               *totalpages = total_swap_pages;
+               oc->totalpages = total_swap_pages;
                for_each_node_mask(nid, *oc->nodemask)
-                       *totalpages += node_spanned_pages(nid);
+                       oc->totalpages += node_spanned_pages(nid);
                return CONSTRAINT_MEMORY_POLICY;
        }
 
@@ -259,98 +277,84 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
                        cpuset_limited = true;
 
        if (cpuset_limited) {
-               *totalpages = total_swap_pages;
+               oc->totalpages = total_swap_pages;
                for_each_node_mask(nid, cpuset_current_mems_allowed)
-                       *totalpages += node_spanned_pages(nid);
+                       oc->totalpages += node_spanned_pages(nid);
                return CONSTRAINT_CPUSET;
        }
        return CONSTRAINT_NONE;
 }
-#else
-static enum oom_constraint constrained_alloc(struct oom_control *oc,
-                                            unsigned long *totalpages)
-{
-       *totalpages = totalram_pages + total_swap_pages;
-       return CONSTRAINT_NONE;
-}
-#endif
 
-enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
-                                       struct task_struct *task)
+static int oom_evaluate_task(struct task_struct *task, void *arg)
 {
+       struct oom_control *oc = arg;
+       unsigned long points;
+
        if (oom_unkillable_task(task, NULL, oc->nodemask))
-               return OOM_SCAN_CONTINUE;
+               goto next;
 
        /*
         * This task already has access to memory reserves and is being killed.
         * Don't allow any other task to have access to the reserves unless
-        * the task has MMF_OOM_REAPED because chances that it would release
+        * the task has MMF_OOM_SKIP because chances that it would release
         * any memory is quite low.
         */
-       if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) {
-               struct task_struct *p = find_lock_task_mm(task);
-               enum oom_scan_t ret = OOM_SCAN_ABORT;
-
-               if (p) {
-                       if (test_bit(MMF_OOM_REAPED, &p->mm->flags))
-                               ret = OOM_SCAN_CONTINUE;
-                       task_unlock(p);
-               }
-
-               return ret;
+       if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) {
+               if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags))
+                       goto next;
+               goto abort;
        }
 
        /*
         * If task is allocating a lot of memory and has been marked to be
         * killed first if it triggers an oom, then select it.
         */
-       if (oom_task_origin(task))
-               return OOM_SCAN_SELECT;
+       if (oom_task_origin(task)) {
+               points = ULONG_MAX;
+               goto select;
+       }
 
-       return OOM_SCAN_OK;
+       points = oom_badness(task, NULL, oc->nodemask, oc->totalpages);
+       if (!points || points < oc->chosen_points)
+               goto next;
+
+       /* Prefer thread group leaders for display purposes */
+       if (points == oc->chosen_points && thread_group_leader(oc->chosen))
+               goto next;
+select:
+       if (oc->chosen)
+               put_task_struct(oc->chosen);
+       get_task_struct(task);
+       oc->chosen = task;
+       oc->chosen_points = points;
+next:
+       return 0;
+abort:
+       if (oc->chosen)
+               put_task_struct(oc->chosen);
+       oc->chosen = (void *)-1UL;
+       return 1;
 }
 
 /*
- * Simple selection loop. We chose the process with the highest
- * number of 'points'.  Returns -1 on scan abort.
+ * Simple selection loop. We choose the process with the highest number of
+ * 'points'. In case scan was aborted, oc->chosen is set to -1.
  */
-static struct task_struct *select_bad_process(struct oom_control *oc,
-               unsigned int *ppoints, unsigned long totalpages)
+static void select_bad_process(struct oom_control *oc)
 {
-       struct task_struct *p;
-       struct task_struct *chosen = NULL;
-       unsigned long chosen_points = 0;
-
-       rcu_read_lock();
-       for_each_process(p) {
-               unsigned int points;
-
-               switch (oom_scan_process_thread(oc, p)) {
-               case OOM_SCAN_SELECT:
-                       chosen = p;
-                       chosen_points = ULONG_MAX;
-                       /* fall through */
-               case OOM_SCAN_CONTINUE:
-                       continue;
-               case OOM_SCAN_ABORT:
-                       rcu_read_unlock();
-                       return (struct task_struct *)(-1UL);
-               case OOM_SCAN_OK:
-                       break;
-               };
-               points = oom_badness(p, NULL, oc->nodemask, totalpages);
-               if (!points || points < chosen_points)
-                       continue;
+       if (is_memcg_oom(oc))
+               mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
+       else {
+               struct task_struct *p;
 
-               chosen = p;
-               chosen_points = points;
+               rcu_read_lock();
+               for_each_process(p)
+                       if (oom_evaluate_task(p, oc))
+                               break;
+               rcu_read_unlock();
        }
-       if (chosen)
-               get_task_struct(chosen);
-       rcu_read_unlock();
 
-       *ppoints = chosen_points * 1000 / totalpages;
-       return chosen;
+       oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages;
 }
 
 /**
@@ -402,6 +406,8 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
        pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
                current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
                current->signal->oom_score_adj);
+       if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
+               pr_warn("COMPACTION is disabled!!!\n");
 
        cpuset_print_current_mems_allowed();
        dump_stack();
@@ -419,7 +425,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
 static atomic_t oom_victims = ATOMIC_INIT(0);
 static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
 
-bool oom_killer_disabled __read_mostly;
+static bool oom_killer_disabled __read_mostly;
 
 #define K(x) ((x) << (PAGE_SHIFT-10))
 
@@ -452,12 +458,10 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
 static struct task_struct *oom_reaper_list;
 static DEFINE_SPINLOCK(oom_reaper_lock);
 
-static bool __oom_reap_task(struct task_struct *tsk)
+static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 {
        struct mmu_gather tlb;
        struct vm_area_struct *vma;
-       struct mm_struct *mm = NULL;
-       struct task_struct *p;
        struct zap_details details = {.check_swap_entries = true,
                                      .ignore_dirty = true};
        bool ret = true;
@@ -465,7 +469,7 @@ static bool __oom_reap_task(struct task_struct *tsk)
        /*
         * We have to make sure to not race with the victim exit path
         * and cause premature new oom victim selection:
-        * __oom_reap_task              exit_mm
+        * __oom_reap_task_mm           exit_mm
         *   mmget_not_zero
         *                                mmput
         *                                  atomic_dec_and_test
@@ -478,22 +482,9 @@ static bool __oom_reap_task(struct task_struct *tsk)
         */
        mutex_lock(&oom_lock);
 
-       /*
-        * Make sure we find the associated mm_struct even when the particular
-        * thread has already terminated and cleared its mm.
-        * We might have race with exit path so consider our work done if there
-        * is no mm.
-        */
-       p = find_lock_task_mm(tsk);
-       if (!p)
-               goto unlock_oom;
-       mm = p->mm;
-       atomic_inc(&mm->mm_count);
-       task_unlock(p);
-
        if (!down_read_trylock(&mm->mmap_sem)) {
                ret = false;
-               goto mm_drop;
+               goto unlock_oom;
        }
 
        /*
@@ -503,9 +494,17 @@ static bool __oom_reap_task(struct task_struct *tsk)
         */
        if (!mmget_not_zero(mm)) {
                up_read(&mm->mmap_sem);
-               goto mm_drop;
+               goto unlock_oom;
        }
 
+       /*
+        * Tell all users of get_user/copy_from_user etc... that the content
+        * is no longer stable. No barriers really needed because unmapping
+        * should imply barriers already and the reader would hit a page fault
+        * if it stumbled over a reaped memory.
+        */
+       set_bit(MMF_UNSTABLE, &mm->flags);
+
        tlb_gather_mmu(&tlb, mm, 0, -1);
        for (vma = mm->mmap ; vma; vma = vma->vm_next) {
                if (is_vm_hugetlb_page(vma))
@@ -540,19 +539,12 @@ static bool __oom_reap_task(struct task_struct *tsk)
                        K(get_mm_counter(mm, MM_SHMEMPAGES)));
        up_read(&mm->mmap_sem);
 
-       /*
-        * This task can be safely ignored because we cannot do much more
-        * to release its memory.
-        */
-       set_bit(MMF_OOM_REAPED, &mm->flags);
        /*
         * Drop our reference but make sure the mmput slow path is called from a
         * different context because we shouldn't risk we get stuck there and
         * put the oom_reaper out of the way.
         */
        mmput_async(mm);
-mm_drop:
-       mmdrop(mm);
 unlock_oom:
        mutex_unlock(&oom_lock);
        return ret;
@@ -562,44 +554,28 @@ unlock_oom:
 static void oom_reap_task(struct task_struct *tsk)
 {
        int attempts = 0;
+       struct mm_struct *mm = tsk->signal->oom_mm;
 
        /* Retry the down_read_trylock(mmap_sem) a few times */
-       while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task(tsk))
+       while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
                schedule_timeout_idle(HZ/10);
 
-       if (attempts > MAX_OOM_REAP_RETRIES) {
-               struct task_struct *p;
+       if (attempts <= MAX_OOM_REAP_RETRIES)
+               goto done;
 
-               pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
-                               task_pid_nr(tsk), tsk->comm);
 
-               /*
-                * If we've already tried to reap this task in the past and
-                * failed it probably doesn't make much sense to try yet again
-                * so hide the mm from the oom killer so that it can move on
-                * to another task with a different mm struct.
-                */
-               p = find_lock_task_mm(tsk);
-               if (p) {
-                       if (test_and_set_bit(MMF_OOM_NOT_REAPABLE, &p->mm->flags)) {
-                               pr_info("oom_reaper: giving up pid:%d (%s)\n",
-                                               task_pid_nr(tsk), tsk->comm);
-                               set_bit(MMF_OOM_REAPED, &p->mm->flags);
-                       }
-                       task_unlock(p);
-               }
+       pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
+               task_pid_nr(tsk), tsk->comm);
+       debug_show_all_locks();
 
-               debug_show_all_locks();
-       }
+done:
+       tsk->oom_reaper_list = NULL;
 
        /*
-        * Clear TIF_MEMDIE because the task shouldn't be sitting on a
-        * reasonably reclaimable memory anymore or it is not a good candidate
-        * for the oom victim right now because it cannot release its memory
-        * itself nor by the oom reaper.
+        * Hide this mm from OOM killer because it has been either reaped or
+        * somebody can't call up_write(mmap_sem).
         */
-       tsk->oom_reaper_list = NULL;
-       exit_oom_victim(tsk);
+       set_bit(MMF_OOM_SKIP, &mm->flags);
 
        /* Drop a reference taken by wake_oom_reaper */
        put_task_struct(tsk);
@@ -607,8 +583,6 @@ static void oom_reap_task(struct task_struct *tsk)
 
 static int oom_reaper(void *unused)
 {
-       set_freezable();
-
        while (true) {
                struct task_struct *tsk = NULL;
 
@@ -627,7 +601,7 @@ static int oom_reaper(void *unused)
        return 0;
 }
 
-void wake_oom_reaper(struct task_struct *tsk)
+static void wake_oom_reaper(struct task_struct *tsk)
 {
        if (!oom_reaper_th)
                return;
@@ -656,7 +630,11 @@ static int __init oom_init(void)
        return 0;
 }
 subsys_initcall(oom_init)
-#endif
+#else
+static inline void wake_oom_reaper(struct task_struct *tsk)
+{
+}
+#endif /* CONFIG_MMU */
 
 /**
  * mark_oom_victim - mark the given task as OOM victim
@@ -664,14 +642,23 @@ subsys_initcall(oom_init)
  *
  * Has to be called with oom_lock held and never after
  * oom has been disabled already.
+ *
+ * tsk->mm has to be non NULL and caller has to guarantee it is stable (either
+ * under task_lock or operate on the current).
  */
-void mark_oom_victim(struct task_struct *tsk)
+static void mark_oom_victim(struct task_struct *tsk)
 {
+       struct mm_struct *mm = tsk->mm;
+
        WARN_ON(oom_killer_disabled);
        /* OOM killer might race with memcg OOM */
        if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
                return;
-       atomic_inc(&tsk->signal->oom_victims);
+
+       /* oom_mm is bound to the signal struct life time. */
+       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
+               atomic_inc(&tsk->signal->oom_mm->mm_count);
+
        /*
         * Make sure that the task is woken up from uninterruptible sleep
         * if it is frozen because OOM killer wouldn't be able to free
@@ -685,21 +672,29 @@ void mark_oom_victim(struct task_struct *tsk)
 /**
  * exit_oom_victim - note the exit of an OOM victim
  */
-void exit_oom_victim(struct task_struct *tsk)
+void exit_oom_victim(void)
 {
-       if (!test_and_clear_tsk_thread_flag(tsk, TIF_MEMDIE))
-               return;
-       atomic_dec(&tsk->signal->oom_victims);
+       clear_thread_flag(TIF_MEMDIE);
 
        if (!atomic_dec_return(&oom_victims))
                wake_up_all(&oom_victims_wait);
 }
 
+/**
+ * oom_killer_enable - enable OOM killer
+ */
+void oom_killer_enable(void)
+{
+       oom_killer_disabled = false;
+}
+
 /**
  * oom_killer_disable - disable OOM killer
+ * @timeout: maximum timeout to wait for oom victims in jiffies
  *
  * Forces all page allocations to fail rather than trigger OOM killer.
- * Will block and wait until all OOM victims are killed.
+ * Will block and wait until all OOM victims are killed or the given
+ * timeout expires.
  *
  * The function cannot be called when there are runnable user tasks because
  * the userspace would see unexpected allocation failures as a result. Any
@@ -708,8 +703,10 @@ void exit_oom_victim(struct task_struct *tsk)
  * Returns true if successful and false if the OOM killer cannot be
  * disabled.
  */
-bool oom_killer_disable(void)
+bool oom_killer_disable(signed long timeout)
 {
+       signed long ret;
+
        /*
         * Make sure to not race with an ongoing OOM killer. Check that the
         * current is not killed (possibly due to sharing the victim's memory).
@@ -719,19 +716,16 @@ bool oom_killer_disable(void)
        oom_killer_disabled = true;
        mutex_unlock(&oom_lock);
 
-       wait_event(oom_victims_wait, !atomic_read(&oom_victims));
+       ret = wait_event_interruptible_timeout(oom_victims_wait,
+                       !atomic_read(&oom_victims), timeout);
+       if (ret <= 0) {
+               oom_killer_enable();
+               return false;
+       }
 
        return true;
 }
 
-/**
- * oom_killer_enable - enable OOM killer
- */
-void oom_killer_enable(void)
-{
-       oom_killer_disabled = false;
-}
-
 static inline bool __task_will_free_mem(struct task_struct *task)
 {
        struct signal_struct *sig = task->signal;
@@ -760,7 +754,7 @@ static inline bool __task_will_free_mem(struct task_struct *task)
  * Caller has to make sure that task->mm is stable (hold task_lock or
  * it operates on the current).
  */
-bool task_will_free_mem(struct task_struct *task)
+static bool task_will_free_mem(struct task_struct *task)
 {
        struct mm_struct *mm = task->mm;
        struct task_struct *p;
@@ -781,15 +775,16 @@ bool task_will_free_mem(struct task_struct *task)
         * This task has already been drained by the oom reaper so there are
         * only small chances it will free some more
         */
-       if (test_bit(MMF_OOM_REAPED, &mm->flags))
+       if (test_bit(MMF_OOM_SKIP, &mm->flags))
                return false;
 
        if (atomic_read(&mm->mm_users) <= 1)
                return true;
 
        /*
-        * This is really pessimistic but we do not have any reliable way
-        * to check that external processes share with our mm
+        * Make sure that all tasks which share the mm with the given tasks
+        * are dying as well to make sure that a) nobody pins its mm and
+        * b) the task is also reapable by the oom reaper.
         */
        rcu_read_lock();
        for_each_process(p) {
@@ -806,14 +801,10 @@ bool task_will_free_mem(struct task_struct *task)
        return ret;
 }
 
-/*
- * Must be called while holding a reference to p, which will be released upon
- * returning.
- */
-void oom_kill_process(struct oom_control *oc, struct task_struct *p,
-                     unsigned int points, unsigned long totalpages,
-                     const char *message)
+static void oom_kill_process(struct oom_control *oc, const char *message)
 {
+       struct task_struct *p = oc->chosen;
+       unsigned int points = oc->chosen_points;
        struct task_struct *victim = p;
        struct task_struct *child;
        struct task_struct *t;
@@ -860,7 +851,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
                         * oom_badness() returns 0 if the thread is unkillable
                         */
                        child_points = oom_badness(child,
-                                       oc->memcg, oc->nodemask, totalpages);
+                               oc->memcg, oc->nodemask, oc->totalpages);
                        if (child_points > victim_points) {
                                put_task_struct(victim);
                                victim = child;
@@ -913,20 +904,20 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
                        continue;
                if (same_thread_group(p, victim))
                        continue;
-               if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p)) {
-                       /*
-                        * We cannot use oom_reaper for the mm shared by this
-                        * process because it wouldn't get killed and so the
-                        * memory might be still used. Hide the mm from the oom
-                        * killer to guarantee OOM forward progress.
-                        */
+               if (is_global_init(p)) {
                        can_oom_reap = false;
-                       set_bit(MMF_OOM_REAPED, &mm->flags);
+                       set_bit(MMF_OOM_SKIP, &mm->flags);
                        pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
                                        task_pid_nr(victim), victim->comm,
                                        task_pid_nr(p), p->comm);
                        continue;
                }
+               /*
+                * No use_mm() user needs to read from the userspace so we are
+                * ok to reap it.
+                */
+               if (unlikely(p->flags & PF_KTHREAD))
+                       continue;
                do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
        }
        rcu_read_unlock();
@@ -942,7 +933,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 /*
  * Determines whether the kernel must panic because of the panic_on_oom sysctl.
  */
-void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint)
+static void check_panic_on_oom(struct oom_control *oc,
+                              enum oom_constraint constraint)
 {
        if (likely(!sysctl_panic_on_oom))
                return;
@@ -988,19 +980,18 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
  */
 bool out_of_memory(struct oom_control *oc)
 {
-       struct task_struct *p;
-       unsigned long totalpages;
        unsigned long freed = 0;
-       unsigned int uninitialized_var(points);
        enum oom_constraint constraint = CONSTRAINT_NONE;
 
        if (oom_killer_disabled)
                return false;
 
-       blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
-       if (freed > 0)
-               /* Got some memory back in the last second. */
-               return true;
+       if (!is_memcg_oom(oc)) {
+               blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
+               if (freed > 0)
+                       /* Got some memory back in the last second. */
+                       return true;
+       }
 
        /*
         * If current has a pending SIGKILL or is exiting, then automatically
@@ -1024,37 +1015,38 @@ bool out_of_memory(struct oom_control *oc)
 
        /*
         * Check if there were limitations on the allocation (only relevant for
-        * NUMA) that may require different handling.
+        * NUMA and memcg) that may require different handling.
         */
-       constraint = constrained_alloc(oc, &totalpages);
+       constraint = constrained_alloc(oc);
        if (constraint != CONSTRAINT_MEMORY_POLICY)
                oc->nodemask = NULL;
        check_panic_on_oom(oc, constraint);
 
-       if (sysctl_oom_kill_allocating_task && current->mm &&
-           !oom_unkillable_task(current, NULL, oc->nodemask) &&
+       if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
+           current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
            current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
                get_task_struct(current);
-               oom_kill_process(oc, current, 0, totalpages,
-                                "Out of memory (oom_kill_allocating_task)");
+               oc->chosen = current;
+               oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
                return true;
        }
 
-       p = select_bad_process(oc, &points, totalpages);
+       select_bad_process(oc);
        /* Found nothing?!?! Either we hang forever, or we panic. */
-       if (!p && !is_sysrq_oom(oc)) {
+       if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {
                dump_header(oc, NULL);
                panic("Out of memory and no killable processes...\n");
        }
-       if (p && p != (void *)-1UL) {
-               oom_kill_process(oc, p, points, totalpages, "Out of memory");
+       if (oc->chosen && oc->chosen != (void *)-1UL) {
+               oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
+                                "Memory cgroup out of memory");
                /*
                 * Give the killed process a good chance to exit before trying
                 * to allocate memory again.
                 */
                schedule_timeout_killable(1);
        }
-       return true;
+       return !!oc->chosen;
 }
 
 /*
@@ -1077,16 +1069,6 @@ void pagefault_out_of_memory(void)
 
        if (!mutex_trylock(&oom_lock))
                return;
-
-       if (!out_of_memory(&oc)) {
-               /*
-                * There shouldn't be any user tasks runnable while the
-                * OOM killer is disabled, so the current task has to
-                * be a racing OOM victim for which oom_killer_disable()
-                * is waiting for.
-                */
-               WARN_ON(test_thread_flag(TIF_MEMDIE));
-       }
-
+       out_of_memory(&oc);
        mutex_unlock(&oom_lock);
 }
index 28d6f36a2d79c26c30e25d52d01a8dc95dbaa4ac..439cc63ad903fb898192188367670efad4a2d1fa 100644 (file)
@@ -1965,36 +1965,6 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
        return false;
 }
 
-void throttle_vm_writeout(gfp_t gfp_mask)
-{
-       unsigned long background_thresh;
-       unsigned long dirty_thresh;
-
-        for ( ; ; ) {
-               global_dirty_limits(&background_thresh, &dirty_thresh);
-               dirty_thresh = hard_dirty_limit(&global_wb_domain, dirty_thresh);
-
-                /*
-                 * Boost the allowable dirty threshold a bit for page
-                 * allocators so they don't get DoS'ed by heavy writers
-                 */
-                dirty_thresh += dirty_thresh / 10;      /* wheeee... */
-
-                if (global_node_page_state(NR_UNSTABLE_NFS) +
-                       global_node_page_state(NR_WRITEBACK) <= dirty_thresh)
-                               break;
-                congestion_wait(BLK_RW_ASYNC, HZ/10);
-
-               /*
-                * The caller might hold locks which can prevent IO completion
-                * or progress in the filesystem.  So we cannot just sit here
-                * waiting for IO to complete.
-                */
-               if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
-                       break;
-        }
-}
-
 /*
  * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
  */
@@ -2746,7 +2716,7 @@ int test_clear_page_writeback(struct page *page)
        int ret;
 
        lock_page_memcg(page);
-       if (mapping) {
+       if (mapping && mapping_use_writeback_tags(mapping)) {
                struct inode *inode = mapping->host;
                struct backing_dev_info *bdi = inode_to_bdi(inode);
                unsigned long flags;
@@ -2789,7 +2759,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
        int ret;
 
        lock_page_memcg(page);
-       if (mapping) {
+       if (mapping && mapping_use_writeback_tags(mapping)) {
                struct inode *inode = mapping->host;
                struct backing_dev_info *bdi = inode_to_bdi(inode);
                unsigned long flags;
index 41940f6e3c1c07eec3467f41de4e5337e5d4b654..721d62c5be69977bc595f9cd8d8d9e5ce618ea26 100644 (file)
@@ -260,7 +260,7 @@ int watermark_scale_factor = 10;
 
 static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
-static unsigned long __meminitdata dma_reserve;
+static unsigned long __meminitdata nr_memory_reserve;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
@@ -613,6 +613,9 @@ static bool need_debug_guardpage(void)
        if (!debug_pagealloc_enabled())
                return false;
 
+       if (!debug_guardpage_minorder())
+               return false;
+
        return true;
 }
 
@@ -621,6 +624,9 @@ static void init_debug_guardpage(void)
        if (!debug_pagealloc_enabled())
                return;
 
+       if (!debug_guardpage_minorder())
+               return;
+
        _debug_guardpage_enabled = true;
 }
 
@@ -641,19 +647,22 @@ static int __init debug_guardpage_minorder_setup(char *buf)
        pr_info("Setting debug_guardpage_minorder to %lu\n", res);
        return 0;
 }
-__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
+early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
 
-static inline void set_page_guard(struct zone *zone, struct page *page,
+static inline bool set_page_guard(struct zone *zone, struct page *page,
                                unsigned int order, int migratetype)
 {
        struct page_ext *page_ext;
 
        if (!debug_guardpage_enabled())
-               return;
+               return false;
+
+       if (order >= debug_guardpage_minorder())
+               return false;
 
        page_ext = lookup_page_ext(page);
        if (unlikely(!page_ext))
-               return;
+               return false;
 
        __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
 
@@ -661,6 +670,8 @@ static inline void set_page_guard(struct zone *zone, struct page *page,
        set_page_private(page, order);
        /* Guard pages are not available for any usage */
        __mod_zone_freepage_state(zone, -(1 << order), migratetype);
+
+       return true;
 }
 
 static inline void clear_page_guard(struct zone *zone, struct page *page,
@@ -682,9 +693,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
                __mod_zone_freepage_state(zone, (1 << order), migratetype);
 }
 #else
-struct page_ext_operations debug_guardpage_ops = { NULL, };
-static inline void set_page_guard(struct zone *zone, struct page *page,
-                               unsigned int order, int migratetype) {}
+struct page_ext_operations debug_guardpage_ops;
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+                       unsigned int order, int migratetype) { return false; }
 static inline void clear_page_guard(struct zone *zone, struct page *page,
                                unsigned int order, int migratetype) {}
 #endif
@@ -1419,15 +1430,18 @@ static void __init deferred_free_range(struct page *page,
                return;
 
        /* Free a large naturally-aligned chunk if possible */
-       if (nr_pages == MAX_ORDER_NR_PAGES &&
-           (pfn & (MAX_ORDER_NR_PAGES-1)) == 0) {
+       if (nr_pages == pageblock_nr_pages &&
+           (pfn & (pageblock_nr_pages - 1)) == 0) {
                set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-               __free_pages_boot_core(page, MAX_ORDER-1);
+               __free_pages_boot_core(page, pageblock_order);
                return;
        }
 
-       for (i = 0; i < nr_pages; i++, page++)
+       for (i = 0; i < nr_pages; i++, page++, pfn++) {
+               if ((pfn & (pageblock_nr_pages - 1)) == 0)
+                       set_pageblock_migratetype(page, MIGRATE_MOVABLE);
                __free_pages_boot_core(page, 0);
+       }
 }
 
 /* Completion tracking for deferred_init_memmap() threads */
@@ -1495,9 +1509,9 @@ static int __init deferred_init_memmap(void *data)
 
                        /*
                         * Ensure pfn_valid is checked every
-                        * MAX_ORDER_NR_PAGES for memory holes
+                        * pageblock_nr_pages for memory holes
                         */
-                       if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) {
+                       if ((pfn & (pageblock_nr_pages - 1)) == 0) {
                                if (!pfn_valid(pfn)) {
                                        page = NULL;
                                        goto free_range;
@@ -1510,7 +1524,7 @@ static int __init deferred_init_memmap(void *data)
                        }
 
                        /* Minimise pfn page lookups and scheduler checks */
-                       if (page && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) {
+                       if (page && (pfn & (pageblock_nr_pages - 1)) != 0) {
                                page++;
                        } else {
                                nr_pages += nr_to_free;
@@ -1546,6 +1560,9 @@ free_range:
                        free_base_page = NULL;
                        free_base_pfn = nr_to_free = 0;
                }
+               /* Free the last block of pages to allocator */
+               nr_pages += nr_to_free;
+               deferred_free_range(free_base_page, free_base_pfn, nr_to_free);
 
                first_init_pfn = max(end_pfn, first_init_pfn);
        }
@@ -1642,18 +1659,15 @@ static inline void expand(struct zone *zone, struct page *page,
                size >>= 1;
                VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
 
-               if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
-                       debug_guardpage_enabled() &&
-                       high < debug_guardpage_minorder()) {
-                       /*
-                        * Mark as guard pages (or page), that will allow to
-                        * merge back to allocator when buddy will be freed.
-                        * Corresponding page table entries will not be touched,
-                        * pages will stay not present in virtual address space
-                        */
-                       set_page_guard(zone, &page[size], high, migratetype);
+               /*
+                * Mark as guard pages (or page), that will allow to
+                * merge back to allocator when buddy will be freed.
+                * Corresponding page table entries will not be touched,
+                * pages will stay not present in virtual address space
+                */
+               if (set_page_guard(zone, &page[size], high, migratetype))
                        continue;
-               }
+
                list_add(&page[size].lru, &area->free_list[migratetype]);
                area->nr_free++;
                set_page_order(&page[size], high);
@@ -2515,9 +2529,14 @@ int __isolate_free_page(struct page *page, unsigned int order)
        mt = get_pageblock_migratetype(page);
 
        if (!is_migrate_isolate(mt)) {
-               /* Obey watermarks as if the page was being allocated */
-               watermark = low_wmark_pages(zone) + (1 << order);
-               if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+               /*
+                * Obey watermarks as if the page was being allocated. We can
+                * emulate a high-order watermark check with a raised order-0
+                * watermark, because we already know our high-order page
+                * exists.
+                */
+               watermark = min_wmark_pages(zone) + (1UL << order);
+               if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
                        return 0;
 
                __mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -3163,6 +3182,61 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        return NULL;
 }
 
+static inline bool
+should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
+                    enum compact_result compact_result,
+                    enum compact_priority *compact_priority,
+                    int compaction_retries)
+{
+       int max_retries = MAX_COMPACT_RETRIES;
+       int min_priority;
+
+       if (!order)
+               return false;
+
+       /*
+        * compaction considers all the zone as desperately out of memory
+        * so it doesn't really make much sense to retry except when the
+        * failure could be caused by insufficient priority
+        */
+       if (compaction_failed(compact_result))
+               goto check_priority;
+
+       /*
+        * make sure the compaction wasn't deferred or didn't bail out early
+        * due to locks contention before we declare that we should give up.
+        * But do not retry if the given zonelist is not suitable for
+        * compaction.
+        */
+       if (compaction_withdrawn(compact_result))
+               return compaction_zonelist_suitable(ac, order, alloc_flags);
+
+       /*
+        * !costly requests are much more important than __GFP_REPEAT
+        * costly ones because they are de facto nofail and invoke OOM
+        * killer to move on while costly can fail and users are ready
+        * to cope with that. 1/4 retries is rather arbitrary but we
+        * would need much more detailed feedback from compaction to
+        * make a better decision.
+        */
+       if (order > PAGE_ALLOC_COSTLY_ORDER)
+               max_retries /= 4;
+       if (compaction_retries <= max_retries)
+               return true;
+
+       /*
+        * Make sure there is at least one attempt at the highest priority
+        * if we exhausted all retries at the lower priorities
+        */
+check_priority:
+       min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
+                       MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
+       if (*compact_priority > min_priority) {
+               (*compact_priority)--;
+               return true;
+       }
+       return false;
+}
 #else
 static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
@@ -3173,8 +3247,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        return NULL;
 }
 
-#endif /* CONFIG_COMPACTION */
-
 static inline bool
 should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
                     enum compact_result compact_result,
@@ -3201,6 +3273,7 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
        }
        return false;
 }
+#endif /* CONFIG_COMPACTION */
 
 /* Perform direct synchronous page reclaim */
 static int
@@ -4581,7 +4654,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
        int j;
        struct zonelist *zonelist;
 
-       zonelist = &pgdat->node_zonelists[0];
+       zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
        for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
                ;
        j = build_zonelists_node(NODE_DATA(node), zonelist, j);
@@ -4597,7 +4670,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
        int j;
        struct zonelist *zonelist;
 
-       zonelist = &pgdat->node_zonelists[1];
+       zonelist = &pgdat->node_zonelists[ZONELIST_NOFALLBACK];
        j = build_zonelists_node(pgdat, zonelist, 0);
        zonelist->_zonerefs[j].zone = NULL;
        zonelist->_zonerefs[j].zone_idx = 0;
@@ -4618,7 +4691,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
        struct zone *z;
        struct zonelist *zonelist;
 
-       zonelist = &pgdat->node_zonelists[0];
+       zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
        pos = 0;
        for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
                for (j = 0; j < nr_nodes; j++) {
@@ -4753,7 +4826,7 @@ static void build_zonelists(pg_data_t *pgdat)
 
        local_node = pgdat->node_id;
 
-       zonelist = &pgdat->node_zonelists[0];
+       zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
        j = build_zonelists_node(pgdat, zonelist, 0);
 
        /*
@@ -5025,15 +5098,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                        break;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-               /*
-                * If not mirrored_kernelcore and ZONE_MOVABLE exists, range
-                * from zone_movable_pfn[nid] to end of each node should be
-                * ZONE_MOVABLE not ZONE_NORMAL. skip it.
-                */
-               if (!mirrored_kernelcore && zone_movable_pfn[nid])
-                       if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid])
-                               continue;
-
                /*
                 * Check given memblock attribute by firmware which can affect
                 * kernel memory layout.  If zone==ZONE_MOVABLE but memory is
@@ -5477,6 +5541,12 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
                        *zone_end_pfn = min(node_end_pfn,
                                arch_zone_highest_possible_pfn[movable_zone]);
 
+               /* Adjust for ZONE_MOVABLE starting within this range */
+               } else if (!mirrored_kernelcore &&
+                       *zone_start_pfn < zone_movable_pfn[nid] &&
+                       *zone_end_pfn > zone_movable_pfn[nid]) {
+                       *zone_end_pfn = zone_movable_pfn[nid];
+
                /* Check if this whole range is within ZONE_MOVABLE */
                } else if (*zone_start_pfn >= zone_movable_pfn[nid])
                        *zone_start_pfn = *zone_end_pfn;
@@ -5580,28 +5650,23 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
         * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
         * and vice versa.
         */
-       if (zone_movable_pfn[nid]) {
-               if (mirrored_kernelcore) {
-                       unsigned long start_pfn, end_pfn;
-                       struct memblock_region *r;
-
-                       for_each_memblock(memory, r) {
-                               start_pfn = clamp(memblock_region_memory_base_pfn(r),
-                                                 zone_start_pfn, zone_end_pfn);
-                               end_pfn = clamp(memblock_region_memory_end_pfn(r),
-                                               zone_start_pfn, zone_end_pfn);
-
-                               if (zone_type == ZONE_MOVABLE &&
-                                   memblock_is_mirror(r))
-                                       nr_absent += end_pfn - start_pfn;
-
-                               if (zone_type == ZONE_NORMAL &&
-                                   !memblock_is_mirror(r))
-                                       nr_absent += end_pfn - start_pfn;
-                       }
-               } else {
-                       if (zone_type == ZONE_NORMAL)
-                               nr_absent += node_end_pfn - zone_movable_pfn[nid];
+       if (mirrored_kernelcore && zone_movable_pfn[nid]) {
+               unsigned long start_pfn, end_pfn;
+               struct memblock_region *r;
+
+               for_each_memblock(memory, r) {
+                       start_pfn = clamp(memblock_region_memory_base_pfn(r),
+                                         zone_start_pfn, zone_end_pfn);
+                       end_pfn = clamp(memblock_region_memory_end_pfn(r),
+                                       zone_start_pfn, zone_end_pfn);
+
+                       if (zone_type == ZONE_MOVABLE &&
+                           memblock_is_mirror(r))
+                               nr_absent += end_pfn - start_pfn;
+
+                       if (zone_type == ZONE_NORMAL &&
+                           !memblock_is_mirror(r))
+                               nr_absent += end_pfn - start_pfn;
                }
        }
 
@@ -5838,10 +5903,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                }
 
                /* Account for reserved pages */
-               if (j == 0 && freesize > dma_reserve) {
-                       freesize -= dma_reserve;
+               if (j == 0 && freesize > nr_memory_reserve) {
+                       freesize -= nr_memory_reserve;
                        printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
-                                       zone_names[0], dma_reserve);
+                                       zone_names[0], nr_memory_reserve);
                }
 
                if (!is_highmem_idx(j))
@@ -6527,8 +6592,9 @@ void __init mem_init_print_info(const char *str)
 }
 
 /**
- * set_dma_reserve - set the specified number of pages reserved in the first zone
- * @new_dma_reserve: The number of pages to mark reserved
+ * set_memory_reserve - set number of pages reserved in the first zone
+ * @nr_reserve: The number of pages to mark reserved
+ * @inc: true increment to existing value; false set new value.
  *
  * The per-cpu batchsize and zone watermarks are determined by managed_pages.
  * In the DMA zone, a significant percentage may be consumed by kernel image
@@ -6537,9 +6603,12 @@ void __init mem_init_print_info(const char *str)
  * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and
  * smaller per-cpu batchsize.
  */
-void __init set_dma_reserve(unsigned long new_dma_reserve)
+void __init set_memory_reserve(unsigned long nr_reserve, bool inc)
 {
-       dma_reserve = new_dma_reserve;
+       if (inc)
+               nr_memory_reserve += nr_reserve;
+       else
+               nr_memory_reserve = nr_reserve;
 }
 
 void __init free_area_init(unsigned long *zones_size)
@@ -6955,6 +7024,17 @@ static int __init set_hashdist(char *str)
 __setup("hashdist=", set_hashdist);
 #endif
 
+#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
+/*
+ * Returns the number of pages that arch has reserved but
+ * is not known to alloc_large_system_hash().
+ */
+static unsigned long __init arch_reserved_kernel_pages(void)
+{
+       return 0;
+}
+#endif
+
 /*
  * allocate a large system hash table from bootmem
  * - it is assumed that the hash table must contain an exact power-of-2
@@ -6979,6 +7059,7 @@ void *__init alloc_large_system_hash(const char *tablename,
        if (!numentries) {
                /* round applicable memory size up to nearest megabyte */
                numentries = nr_kernel_pages;
+               numentries -= arch_reserved_kernel_pages();
 
                /* It isn't necessary when PAGE_SIZE >= 1MB */
                if (PAGE_SHIFT < 20)
index 44a4c029c8e79edef27405b44dbc2d6fcfa71112..121dcffc4ec1768a6fc71dda8af878aa0d16cb92 100644 (file)
  * and page extension core can skip to allocate memory. As result,
  * none of memory is wasted.
  *
+ * When need callback returns true, page_ext checks if there is a request for
+ * extra memory through size in struct page_ext_operations. If it is non-zero,
+ * extra space is allocated for each page_ext entry and offset is returned to
+ * user through offset in struct page_ext_operations.
+ *
  * The init callback is used to do proper initialization after page extension
  * is completely initialized. In sparse memory system, extra memory is
  * allocated some time later than memmap is allocated. In other words, lifetime
@@ -66,18 +71,24 @@ static struct page_ext_operations *page_ext_ops[] = {
 };
 
 static unsigned long total_usage;
+static unsigned long extra_mem;
 
 static bool __init invoke_need_callbacks(void)
 {
        int i;
        int entries = ARRAY_SIZE(page_ext_ops);
+       bool need = false;
 
        for (i = 0; i < entries; i++) {
-               if (page_ext_ops[i]->need && page_ext_ops[i]->need())
-                       return true;
+               if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
+                       page_ext_ops[i]->offset = sizeof(struct page_ext) +
+                                               extra_mem;
+                       extra_mem += page_ext_ops[i]->size;
+                       need = true;
+               }
        }
 
-       return false;
+       return need;
 }
 
 static void __init invoke_init_callbacks(void)
@@ -91,6 +102,16 @@ static void __init invoke_init_callbacks(void)
        }
 }
 
+static unsigned long get_entry_size(void)
+{
+       return sizeof(struct page_ext) + extra_mem;
+}
+
+static inline struct page_ext *get_entry(void *base, unsigned long index)
+{
+       return base + get_entry_size() * index;
+}
+
 #if !defined(CONFIG_SPARSEMEM)
 
 
@@ -102,7 +123,7 @@ void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
 struct page_ext *lookup_page_ext(struct page *page)
 {
        unsigned long pfn = page_to_pfn(page);
-       unsigned long offset;
+       unsigned long index;
        struct page_ext *base;
 
        base = NODE_DATA(page_to_nid(page))->node_page_ext;
@@ -119,9 +140,9 @@ struct page_ext *lookup_page_ext(struct page *page)
        if (unlikely(!base))
                return NULL;
 #endif
-       offset = pfn - round_down(node_start_pfn(page_to_nid(page)),
+       index = pfn - round_down(node_start_pfn(page_to_nid(page)),
                                        MAX_ORDER_NR_PAGES);
-       return base + offset;
+       return get_entry(base, index);
 }
 
 static int __init alloc_node_page_ext(int nid)
@@ -143,7 +164,7 @@ static int __init alloc_node_page_ext(int nid)
                !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
                nr_pages += MAX_ORDER_NR_PAGES;
 
-       table_size = sizeof(struct page_ext) * nr_pages;
+       table_size = get_entry_size() * nr_pages;
 
        base = memblock_virt_alloc_try_nid_nopanic(
                        table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
@@ -196,7 +217,7 @@ struct page_ext *lookup_page_ext(struct page *page)
        if (!section->page_ext)
                return NULL;
 #endif
-       return section->page_ext + pfn;
+       return get_entry(section->page_ext, pfn);
 }
 
 static void *__meminit alloc_page_ext(size_t size, int nid)
@@ -229,7 +250,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
        if (section->page_ext)
                return 0;
 
-       table_size = sizeof(struct page_ext) * PAGES_PER_SECTION;
+       table_size = get_entry_size() * PAGES_PER_SECTION;
        base = alloc_page_ext(table_size, nid);
 
        /*
@@ -249,7 +270,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
         * we need to apply a mask.
         */
        pfn &= PAGE_SECTION_MASK;
-       section->page_ext = base - pfn;
+       section->page_ext = (void *)base - get_entry_size() * pfn;
        total_usage += table_size;
        return 0;
 }
@@ -262,7 +283,7 @@ static void free_page_ext(void *addr)
                struct page *page = virt_to_page(addr);
                size_t table_size;
 
-               table_size = sizeof(struct page_ext) * PAGES_PER_SECTION;
+               table_size = get_entry_size() * PAGES_PER_SECTION;
 
                BUG_ON(PageReserved(page));
                free_pages_exact(addr, table_size);
@@ -277,7 +298,7 @@ static void __free_page_ext(unsigned long pfn)
        ms = __pfn_to_section(pfn);
        if (!ms || !ms->page_ext)
                return;
-       base = ms->page_ext + pfn;
+       base = get_entry(ms->page_ext, pfn);
        free_page_ext(base);
        ms->page_ext = NULL;
 }
index ec6dc1886f71fa2eb77c5629a9a0bc817613ac91..c3cee247f2e609f1fb1f3e799a35dad40e4932c8 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/jump_label.h>
 #include <linux/migrate.h>
 #include <linux/stackdepot.h>
+#include <linux/seq_file.h>
 
 #include "internal.h"
 
  */
 #define PAGE_OWNER_STACK_DEPTH (16)
 
+struct page_owner {
+       unsigned int order;
+       gfp_t gfp_mask;
+       int last_migrate_reason;
+       depot_stack_handle_t handle;
+};
+
 static bool page_owner_disabled = true;
 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 
@@ -85,10 +93,16 @@ static void init_page_owner(void)
 }
 
 struct page_ext_operations page_owner_ops = {
+       .size = sizeof(struct page_owner),
        .need = need_page_owner,
        .init = init_page_owner,
 };
 
+static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
+{
+       return (void *)page_ext + page_owner_ops.offset;
+}
+
 void __reset_page_owner(struct page *page, unsigned int order)
 {
        int i;
@@ -155,14 +169,16 @@ noinline void __set_page_owner(struct page *page, unsigned int order,
                                        gfp_t gfp_mask)
 {
        struct page_ext *page_ext = lookup_page_ext(page);
+       struct page_owner *page_owner;
 
        if (unlikely(!page_ext))
                return;
 
-       page_ext->handle = save_stack(gfp_mask);
-       page_ext->order = order;
-       page_ext->gfp_mask = gfp_mask;
-       page_ext->last_migrate_reason = -1;
+       page_owner = get_page_owner(page_ext);
+       page_owner->handle = save_stack(gfp_mask);
+       page_owner->order = order;
+       page_owner->gfp_mask = gfp_mask;
+       page_owner->last_migrate_reason = -1;
 
        __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
 }
@@ -170,21 +186,26 @@ noinline void __set_page_owner(struct page *page, unsigned int order,
 void __set_page_owner_migrate_reason(struct page *page, int reason)
 {
        struct page_ext *page_ext = lookup_page_ext(page);
+       struct page_owner *page_owner;
+
        if (unlikely(!page_ext))
                return;
 
-       page_ext->last_migrate_reason = reason;
+       page_owner = get_page_owner(page_ext);
+       page_owner->last_migrate_reason = reason;
 }
 
 void __split_page_owner(struct page *page, unsigned int order)
 {
        int i;
        struct page_ext *page_ext = lookup_page_ext(page);
+       struct page_owner *page_owner;
 
        if (unlikely(!page_ext))
                return;
 
-       page_ext->order = 0;
+       page_owner = get_page_owner(page_ext);
+       page_owner->order = 0;
        for (i = 1; i < (1 << order); i++)
                __copy_page_owner(page, page + i);
 }
@@ -193,14 +214,18 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
 {
        struct page_ext *old_ext = lookup_page_ext(oldpage);
        struct page_ext *new_ext = lookup_page_ext(newpage);
+       struct page_owner *old_page_owner, *new_page_owner;
 
        if (unlikely(!old_ext || !new_ext))
                return;
 
-       new_ext->order = old_ext->order;
-       new_ext->gfp_mask = old_ext->gfp_mask;
-       new_ext->last_migrate_reason = old_ext->last_migrate_reason;
-       new_ext->handle = old_ext->handle;
+       old_page_owner = get_page_owner(old_ext);
+       new_page_owner = get_page_owner(new_ext);
+       new_page_owner->order = old_page_owner->order;
+       new_page_owner->gfp_mask = old_page_owner->gfp_mask;
+       new_page_owner->last_migrate_reason =
+               old_page_owner->last_migrate_reason;
+       new_page_owner->handle = old_page_owner->handle;
 
        /*
         * We don't clear the bit on the oldpage as it's going to be freed
@@ -214,9 +239,88 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
        __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
 }
 
+void pagetypeinfo_showmixedcount_print(struct seq_file *m,
+                                      pg_data_t *pgdat, struct zone *zone)
+{
+       struct page *page;
+       struct page_ext *page_ext;
+       struct page_owner *page_owner;
+       unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
+       unsigned long end_pfn = pfn + zone->spanned_pages;
+       unsigned long count[MIGRATE_TYPES] = { 0, };
+       int pageblock_mt, page_mt;
+       int i;
+
+       /* Scan block by block. First and last block may be incomplete */
+       pfn = zone->zone_start_pfn;
+
+       /*
+        * Walk the zone in pageblock_nr_pages steps. If a page block spans
+        * a zone boundary, it will be double counted between zones. This does
+        * not matter as the mixed block count will still be correct
+        */
+       for (; pfn < end_pfn; ) {
+               if (!pfn_valid(pfn)) {
+                       pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+                       continue;
+               }
+
+               block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+               block_end_pfn = min(block_end_pfn, end_pfn);
+
+               page = pfn_to_page(pfn);
+               pageblock_mt = get_pageblock_migratetype(page);
+
+               for (; pfn < block_end_pfn; pfn++) {
+                       if (!pfn_valid_within(pfn))
+                               continue;
+
+                       page = pfn_to_page(pfn);
+
+                       if (page_zone(page) != zone)
+                               continue;
+
+                       if (PageBuddy(page)) {
+                               pfn += (1UL << page_order(page)) - 1;
+                               continue;
+                       }
+
+                       if (PageReserved(page))
+                               continue;
+
+                       page_ext = lookup_page_ext(page);
+                       if (unlikely(!page_ext))
+                               continue;
+
+                       if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+                               continue;
+
+                       page_owner = get_page_owner(page_ext);
+                       page_mt = gfpflags_to_migratetype(
+                                       page_owner->gfp_mask);
+                       if (pageblock_mt != page_mt) {
+                               if (is_migrate_cma(pageblock_mt))
+                                       count[MIGRATE_MOVABLE]++;
+                               else
+                                       count[pageblock_mt]++;
+
+                               pfn = block_end_pfn;
+                               break;
+                       }
+                       pfn += (1UL << page_owner->order) - 1;
+               }
+       }
+
+       /* Print counts */
+       seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
+       for (i = 0; i < MIGRATE_TYPES; i++)
+               seq_printf(m, "%12lu ", count[i]);
+       seq_putc(m, '\n');
+}
+
 static ssize_t
 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
-               struct page *page, struct page_ext *page_ext,
+               struct page *page, struct page_owner *page_owner,
                depot_stack_handle_t handle)
 {
        int ret;
@@ -236,15 +340,15 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 
        ret = snprintf(kbuf, count,
                        "Page allocated via order %u, mask %#x(%pGg)\n",
-                       page_ext->order, page_ext->gfp_mask,
-                       &page_ext->gfp_mask);
+                       page_owner->order, page_owner->gfp_mask,
+                       &page_owner->gfp_mask);
 
        if (ret >= count)
                goto err;
 
        /* Print information relevant to grouping pages by mobility */
        pageblock_mt = get_pageblock_migratetype(page);
-       page_mt  = gfpflags_to_migratetype(page_ext->gfp_mask);
+       page_mt  = gfpflags_to_migratetype(page_owner->gfp_mask);
        ret += snprintf(kbuf + ret, count - ret,
                        "PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
                        pfn,
@@ -261,10 +365,10 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
        if (ret >= count)
                goto err;
 
-       if (page_ext->last_migrate_reason != -1) {
+       if (page_owner->last_migrate_reason != -1) {
                ret += snprintf(kbuf + ret, count - ret,
                        "Page has been migrated, last migrate reason: %s\n",
-                       migrate_reason_names[page_ext->last_migrate_reason]);
+                       migrate_reason_names[page_owner->last_migrate_reason]);
                if (ret >= count)
                        goto err;
        }
@@ -287,6 +391,7 @@ err:
 void __dump_page_owner(struct page *page)
 {
        struct page_ext *page_ext = lookup_page_ext(page);
+       struct page_owner *page_owner;
        unsigned long entries[PAGE_OWNER_STACK_DEPTH];
        struct stack_trace trace = {
                .nr_entries = 0,
@@ -302,7 +407,9 @@ void __dump_page_owner(struct page *page)
                pr_alert("There is not page extension available.\n");
                return;
        }
-       gfp_mask = page_ext->gfp_mask;
+
+       page_owner = get_page_owner(page_ext);
+       gfp_mask = page_owner->gfp_mask;
        mt = gfpflags_to_migratetype(gfp_mask);
 
        if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
@@ -310,7 +417,7 @@ void __dump_page_owner(struct page *page)
                return;
        }
 
-       handle = READ_ONCE(page_ext->handle);
+       handle = READ_ONCE(page_owner->handle);
        if (!handle) {
                pr_alert("page_owner info is not active (free page?)\n");
                return;
@@ -318,12 +425,12 @@ void __dump_page_owner(struct page *page)
 
        depot_fetch_stack(handle, &trace);
        pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
-                page_ext->order, migratetype_names[mt], gfp_mask, &gfp_mask);
+                page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
        print_stack_trace(&trace, 0);
 
-       if (page_ext->last_migrate_reason != -1)
+       if (page_owner->last_migrate_reason != -1)
                pr_alert("page has been migrated, last migrate reason: %s\n",
-                       migrate_reason_names[page_ext->last_migrate_reason]);
+                       migrate_reason_names[page_owner->last_migrate_reason]);
 }
 
 static ssize_t
@@ -332,6 +439,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        unsigned long pfn;
        struct page *page;
        struct page_ext *page_ext;
+       struct page_owner *page_owner;
        depot_stack_handle_t handle;
 
        if (!static_branch_unlikely(&page_owner_inited))
@@ -381,11 +489,13 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
                if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
                        continue;
 
+               page_owner = get_page_owner(page_ext);
+
                /*
                 * Access to page_ext->handle isn't synchronous so we should
                 * be careful to access it.
                 */
-               handle = READ_ONCE(page_ext->handle);
+               handle = READ_ONCE(page_owner->handle);
                if (!handle)
                        continue;
 
@@ -393,7 +503,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
                *ppos = (pfn - min_low_pfn) + 1;
 
                return print_page_owner(buf, count, pfn, page,
-                               page_ext, handle);
+                               page_owner, handle);
        }
 
        return 0;
@@ -417,7 +527,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
         */
        for (; pfn < end_pfn; ) {
                if (!pfn_valid(pfn)) {
-                       pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+                       pfn = ALIGN(pfn + 1, pageblock_nr_pages);
                        continue;
                }
 
index 090fb26b3a39b4feba105650f2a663f5c9972064..f2b8fb9a6ecca560f1cdff0338db31438b8d535c 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -233,6 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
        spin_lock_init(&parent->list_lock);
        parent->free_objects = 0;
        parent->free_touched = 0;
+       parent->num_slabs = 0;
 }
 
 #define MAKE_LIST(cachep, listp, slab, nodeid)                         \
@@ -1382,24 +1383,27 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
        for_each_kmem_cache_node(cachep, node, n) {
                unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
                unsigned long active_slabs = 0, num_slabs = 0;
+               unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+               unsigned long num_slabs_full;
 
                spin_lock_irqsave(&n->list_lock, flags);
-               list_for_each_entry(page, &n->slabs_full, lru) {
-                       active_objs += cachep->num;
-                       active_slabs++;
-               }
+               num_slabs = n->num_slabs;
                list_for_each_entry(page, &n->slabs_partial, lru) {
                        active_objs += page->active;
-                       active_slabs++;
+                       num_slabs_partial++;
                }
                list_for_each_entry(page, &n->slabs_free, lru)
-                       num_slabs++;
+                       num_slabs_free++;
 
                free_objects += n->free_objects;
                spin_unlock_irqrestore(&n->list_lock, flags);
 
-               num_slabs += active_slabs;
                num_objs = num_slabs * cachep->num;
+               active_slabs = num_slabs - num_slabs_free;
+               num_slabs_full = num_slabs -
+                       (num_slabs_partial + num_slabs_free);
+               active_objs += (num_slabs_full * cachep->num);
+
                pr_warn("  node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
                        node, active_slabs, num_slabs, active_objs, num_objs,
                        free_objects);
@@ -2314,6 +2318,7 @@ static int drain_freelist(struct kmem_cache *cache,
 
                page = list_entry(p, struct page, lru);
                list_del(&page->lru);
+               n->num_slabs--;
                /*
                 * Safe to drop the lock. The slab is no longer linked
                 * to the cache.
@@ -2752,6 +2757,8 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
                list_add_tail(&page->lru, &(n->slabs_free));
        else
                fixup_slab_list(cachep, n, page, &list);
+
+       n->num_slabs++;
        STATS_INC_GROWN(cachep);
        n->free_objects += cachep->num - page->active;
        spin_unlock(&n->list_lock);
@@ -3443,6 +3450,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
 
                page = list_last_entry(&n->slabs_free, struct page, lru);
                list_move(&page->lru, list);
+               n->num_slabs--;
        }
 }
 
@@ -4099,6 +4107,8 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
        unsigned long num_objs;
        unsigned long active_slabs = 0;
        unsigned long num_slabs, free_objects = 0, shared_avail = 0;
+       unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+       unsigned long num_slabs_full = 0;
        const char *name;
        char *error = NULL;
        int node;
@@ -4111,33 +4121,34 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
                check_irq_on();
                spin_lock_irq(&n->list_lock);
 
-               list_for_each_entry(page, &n->slabs_full, lru) {
-                       if (page->active != cachep->num && !error)
-                               error = "slabs_full accounting error";
-                       active_objs += cachep->num;
-                       active_slabs++;
-               }
+               num_slabs += n->num_slabs;
+
                list_for_each_entry(page, &n->slabs_partial, lru) {
                        if (page->active == cachep->num && !error)
                                error = "slabs_partial accounting error";
                        if (!page->active && !error)
                                error = "slabs_partial accounting error";
                        active_objs += page->active;
-                       active_slabs++;
+                       num_slabs_partial++;
                }
+
                list_for_each_entry(page, &n->slabs_free, lru) {
                        if (page->active && !error)
                                error = "slabs_free accounting error";
-                       num_slabs++;
+                       num_slabs_free++;
                }
+
                free_objects += n->free_objects;
                if (n->shared)
                        shared_avail += n->shared->avail;
 
                spin_unlock_irq(&n->list_lock);
        }
-       num_slabs += active_slabs;
        num_objs = num_slabs * cachep->num;
+       active_slabs = num_slabs - num_slabs_free;
+       num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free);
+       active_objs += (num_slabs_full * cachep->num);
+
        if (num_objs - active_objs != free_objects && !error)
                error = "free_objects accounting error";
 
index 9653f2e2591ad0982d2dc74c668323a43e5b026d..bc05fdc3edce106b12e5113ad8c8777de6de217d 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -432,6 +432,7 @@ struct kmem_cache_node {
        struct list_head slabs_partial; /* partial list first, better asm code */
        struct list_head slabs_full;
        struct list_head slabs_free;
+       unsigned long num_slabs;
        unsigned long free_objects;
        unsigned int free_limit;
        unsigned int colour_next;       /* Per-node cache coloring */
index 75c63bb2a1da1dc0c3e55600e9db1618949df87a..4dcf852e1e6d8f2e9f0eeca9ee39f620ea972957 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -748,10 +748,8 @@ void release_pages(struct page **pages, int nr, bool cold)
                        locked_pgdat = NULL;
                }
 
-               if (is_huge_zero_page(page)) {
-                       put_huge_zero_page();
+               if (is_huge_zero_page(page))
                        continue;
-               }
 
                page = compound_head(page);
                if (!put_page_testzero(page))
index c8310a37be3abbcf267472f30bd0e3677c1336d4..35d7e0ee1c77c9fb94915905e171f82ff859b7da 100644 (file)
@@ -37,6 +37,8 @@ struct address_space swapper_spaces[MAX_SWAPFILES] = {
                .page_tree      = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
                .i_mmap_writable = ATOMIC_INIT(0),
                .a_ops          = &swap_aops,
+               /* swap cache doesn't use writeback related tags */
+               .flags          = 1 << AS_NO_WRITEBACK_TAGS,
        }
 };
 
@@ -92,7 +94,7 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
        address_space = swap_address_space(entry);
        spin_lock_irq(&address_space->tree_lock);
        error = radix_tree_insert(&address_space->page_tree,
-                                       entry.val, page);
+                                 swp_offset(entry), page);
        if (likely(!error)) {
                address_space->nrpages++;
                __inc_node_page_state(page, NR_FILE_PAGES);
@@ -143,7 +145,7 @@ void __delete_from_swap_cache(struct page *page)
 
        entry.val = page_private(page);
        address_space = swap_address_space(entry);
-       radix_tree_delete(&address_space->page_tree, page_private(page));
+       radix_tree_delete(&address_space->page_tree, swp_offset(entry));
        set_page_private(page, 0);
        ClearPageSwapCache(page);
        address_space->nrpages--;
@@ -252,9 +254,7 @@ static inline void free_swap_cache(struct page *page)
 void free_page_and_swap_cache(struct page *page)
 {
        free_swap_cache(page);
-       if (is_huge_zero_page(page))
-               put_huge_zero_page();
-       else
+       if (!is_huge_zero_page(page))
                put_page(page);
 }
 
@@ -283,7 +283,7 @@ struct page * lookup_swap_cache(swp_entry_t entry)
 {
        struct page *page;
 
-       page = find_get_page(swap_address_space(entry), entry.val);
+       page = find_get_page(swap_address_space(entry), swp_offset(entry));
 
        if (page) {
                INC_CACHE_INFO(find_success);
@@ -310,7 +310,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                 * called after lookup_swap_cache() failed, re-calling
                 * that would confuse statistics.
                 */
-               found_page = find_get_page(swapper_space, entry.val);
+               found_page = find_get_page(swapper_space, swp_offset(entry));
                if (found_page)
                        break;
 
index 78cfa292a29aa0780e5e83efd7b7733ce958e605..6d375b62bd591995f356919c54f14988f2484d87 100644 (file)
@@ -105,7 +105,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
        struct page *page;
        int ret = 0;
 
-       page = find_get_page(swap_address_space(entry), entry.val);
+       page = find_get_page(swap_address_space(entry), swp_offset(entry));
        if (!page)
                return 0;
        /*
@@ -257,6 +257,53 @@ static inline void cluster_set_null(struct swap_cluster_info *info)
        info->data = 0;
 }
 
+static inline bool cluster_list_empty(struct swap_cluster_list *list)
+{
+       return cluster_is_null(&list->head);
+}
+
+static inline unsigned int cluster_list_first(struct swap_cluster_list *list)
+{
+       return cluster_next(&list->head);
+}
+
+static void cluster_list_init(struct swap_cluster_list *list)
+{
+       cluster_set_null(&list->head);
+       cluster_set_null(&list->tail);
+}
+
+static void cluster_list_add_tail(struct swap_cluster_list *list,
+                                 struct swap_cluster_info *ci,
+                                 unsigned int idx)
+{
+       if (cluster_list_empty(list)) {
+               cluster_set_next_flag(&list->head, idx, 0);
+               cluster_set_next_flag(&list->tail, idx, 0);
+       } else {
+               unsigned int tail = cluster_next(&list->tail);
+
+               cluster_set_next(&ci[tail], idx);
+               cluster_set_next_flag(&list->tail, idx, 0);
+       }
+}
+
+static unsigned int cluster_list_del_first(struct swap_cluster_list *list,
+                                          struct swap_cluster_info *ci)
+{
+       unsigned int idx;
+
+       idx = cluster_next(&list->head);
+       if (cluster_next(&list->tail) == idx) {
+               cluster_set_null(&list->head);
+               cluster_set_null(&list->tail);
+       } else
+               cluster_set_next_flag(&list->head,
+                                     cluster_next(&ci[idx]), 0);
+
+       return idx;
+}
+
 /* Add a cluster to discard list and schedule it to do discard */
 static void swap_cluster_schedule_discard(struct swap_info_struct *si,
                unsigned int idx)
@@ -270,17 +317,7 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
        memset(si->swap_map + idx * SWAPFILE_CLUSTER,
                        SWAP_MAP_BAD, SWAPFILE_CLUSTER);
 
-       if (cluster_is_null(&si->discard_cluster_head)) {
-               cluster_set_next_flag(&si->discard_cluster_head,
-                                               idx, 0);
-               cluster_set_next_flag(&si->discard_cluster_tail,
-                                               idx, 0);
-       } else {
-               unsigned int tail = cluster_next(&si->discard_cluster_tail);
-               cluster_set_next(&si->cluster_info[tail], idx);
-               cluster_set_next_flag(&si->discard_cluster_tail,
-                                               idx, 0);
-       }
+       cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx);
 
        schedule_work(&si->discard_work);
 }
@@ -296,15 +333,8 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si)
 
        info = si->cluster_info;
 
-       while (!cluster_is_null(&si->discard_cluster_head)) {
-               idx = cluster_next(&si->discard_cluster_head);
-
-               cluster_set_next_flag(&si->discard_cluster_head,
-                                               cluster_next(&info[idx]), 0);
-               if (cluster_next(&si->discard_cluster_tail) == idx) {
-                       cluster_set_null(&si->discard_cluster_head);
-                       cluster_set_null(&si->discard_cluster_tail);
-               }
+       while (!cluster_list_empty(&si->discard_clusters)) {
+               idx = cluster_list_del_first(&si->discard_clusters, info);
                spin_unlock(&si->lock);
 
                discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
@@ -312,19 +342,7 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si)
 
                spin_lock(&si->lock);
                cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE);
-               if (cluster_is_null(&si->free_cluster_head)) {
-                       cluster_set_next_flag(&si->free_cluster_head,
-                                               idx, 0);
-                       cluster_set_next_flag(&si->free_cluster_tail,
-                                               idx, 0);
-               } else {
-                       unsigned int tail;
-
-                       tail = cluster_next(&si->free_cluster_tail);
-                       cluster_set_next(&info[tail], idx);
-                       cluster_set_next_flag(&si->free_cluster_tail,
-                                               idx, 0);
-               }
+               cluster_list_add_tail(&si->free_clusters, info, idx);
                memset(si->swap_map + idx * SWAPFILE_CLUSTER,
                                0, SWAPFILE_CLUSTER);
        }
@@ -353,13 +371,8 @@ static void inc_cluster_info_page(struct swap_info_struct *p,
        if (!cluster_info)
                return;
        if (cluster_is_free(&cluster_info[idx])) {
-               VM_BUG_ON(cluster_next(&p->free_cluster_head) != idx);
-               cluster_set_next_flag(&p->free_cluster_head,
-                       cluster_next(&cluster_info[idx]), 0);
-               if (cluster_next(&p->free_cluster_tail) == idx) {
-                       cluster_set_null(&p->free_cluster_tail);
-                       cluster_set_null(&p->free_cluster_head);
-               }
+               VM_BUG_ON(cluster_list_first(&p->free_clusters) != idx);
+               cluster_list_del_first(&p->free_clusters, cluster_info);
                cluster_set_count_flag(&cluster_info[idx], 0, 0);
        }
 
@@ -398,14 +411,7 @@ static void dec_cluster_info_page(struct swap_info_struct *p,
                }
 
                cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
-               if (cluster_is_null(&p->free_cluster_head)) {
-                       cluster_set_next_flag(&p->free_cluster_head, idx, 0);
-                       cluster_set_next_flag(&p->free_cluster_tail, idx, 0);
-               } else {
-                       unsigned int tail = cluster_next(&p->free_cluster_tail);
-                       cluster_set_next(&cluster_info[tail], idx);
-                       cluster_set_next_flag(&p->free_cluster_tail, idx, 0);
-               }
+               cluster_list_add_tail(&p->free_clusters, cluster_info, idx);
        }
 }
 
@@ -421,8 +427,8 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
        bool conflict;
 
        offset /= SWAPFILE_CLUSTER;
-       conflict = !cluster_is_null(&si->free_cluster_head) &&
-               offset != cluster_next(&si->free_cluster_head) &&
+       conflict = !cluster_list_empty(&si->free_clusters) &&
+               offset != cluster_list_first(&si->free_clusters) &&
                cluster_is_free(&si->cluster_info[offset]);
 
        if (!conflict)
@@ -447,11 +453,11 @@ static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
 new_cluster:
        cluster = this_cpu_ptr(si->percpu_cluster);
        if (cluster_is_null(&cluster->index)) {
-               if (!cluster_is_null(&si->free_cluster_head)) {
-                       cluster->index = si->free_cluster_head;
+               if (!cluster_list_empty(&si->free_clusters)) {
+                       cluster->index = si->free_clusters.head;
                        cluster->next = cluster_next(&cluster->index) *
                                        SWAPFILE_CLUSTER;
-               } else if (!cluster_is_null(&si->discard_cluster_head)) {
+               } else if (!cluster_list_empty(&si->discard_clusters)) {
                        /*
                         * we don't have free cluster but have some clusters in
                         * discarding, do discard now and reclaim them
@@ -999,7 +1005,7 @@ int free_swap_and_cache(swp_entry_t entry)
        if (p) {
                if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
                        page = find_get_page(swap_address_space(entry),
-                                               entry.val);
+                                            swp_offset(entry));
                        if (page && !trylock_page(page)) {
                                put_page(page);
                                page = NULL;
@@ -2292,10 +2298,8 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
 
        nr_good_pages = maxpages - 1;   /* omit header page */
 
-       cluster_set_null(&p->free_cluster_head);
-       cluster_set_null(&p->free_cluster_tail);
-       cluster_set_null(&p->discard_cluster_head);
-       cluster_set_null(&p->discard_cluster_tail);
+       cluster_list_init(&p->free_clusters);
+       cluster_list_init(&p->discard_clusters);
 
        for (i = 0; i < swap_header->info.nr_badpages; i++) {
                unsigned int page_nr = swap_header->info.badpages[i];
@@ -2341,19 +2345,8 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
        for (i = 0; i < nr_clusters; i++) {
                if (!cluster_count(&cluster_info[idx])) {
                        cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
-                       if (cluster_is_null(&p->free_cluster_head)) {
-                               cluster_set_next_flag(&p->free_cluster_head,
-                                                               idx, 0);
-                               cluster_set_next_flag(&p->free_cluster_tail,
-                                                               idx, 0);
-                       } else {
-                               unsigned int tail;
-
-                               tail = cluster_next(&p->free_cluster_tail);
-                               cluster_set_next(&cluster_info[tail], idx);
-                               cluster_set_next_flag(&p->free_cluster_tail,
-                                                               idx, 0);
-                       }
+                       cluster_list_add_tail(&p->free_clusters, cluster_info,
+                                             idx);
                }
                idx++;
                if (idx == nr_clusters)
index fd09dc9c6812bb86d483b1638d519b565958049a..035fdeb35b43b936a0e247f3e7b658caf55e9404 100644 (file)
@@ -87,11 +87,11 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
 {
        int i;
 
+       count_vm_vmacache_event(VMACACHE_FIND_CALLS);
+
        if (!vmacache_valid(mm))
                return NULL;
 
-       count_vm_vmacache_event(VMACACHE_FIND_CALLS);
-
        for (i = 0; i < VMACACHE_SIZE; i++) {
                struct vm_area_struct *vma = current->vmacache[i];
 
@@ -115,11 +115,11 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
 {
        int i;
 
+       count_vm_vmacache_event(VMACACHE_FIND_CALLS);
+
        if (!vmacache_valid(mm))
                return NULL;
 
-       count_vm_vmacache_event(VMACACHE_FIND_CALLS);
-
        for (i = 0; i < VMACACHE_SIZE; i++) {
                struct vm_area_struct *vma = current->vmacache[i];
 
index 91f44e78c516f67ff9969f47679bcefa22eed61d..80660a0f989b6371dfa08406de6cd11bfd136382 100644 (file)
@@ -1359,14 +1359,14 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
        struct vm_struct *area;
 
        BUG_ON(in_interrupt());
-       if (flags & VM_IOREMAP)
-               align = 1ul << clamp_t(int, fls_long(size),
-                                      PAGE_SHIFT, IOREMAP_MAX_ORDER);
-
        size = PAGE_ALIGN(size);
        if (unlikely(!size))
                return NULL;
 
+       if (flags & VM_IOREMAP)
+               align = 1ul << clamp_t(int, get_count_order_long(size),
+                                      PAGE_SHIFT, IOREMAP_MAX_ORDER);
+
        area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
        if (unlikely(!area))
                return NULL;
index b1e12a1ea9cfcc6ece1690305fc2d44bfdcf5b37..55943a284082845d526d3c3040114e2e5166a69e 100644 (file)
@@ -2437,8 +2437,6 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
        if (inactive_list_is_low(lruvec, false, sc))
                shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
                                   sc, LRU_ACTIVE_ANON);
-
-       throttle_vm_writeout(sc->gfp_mask);
 }
 
 /* Use reclaim/compaction for costly allocs or under memory pressure */
@@ -2499,7 +2497,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
         * If we have not reclaimed enough pages for compaction and the
         * inactive lists are large enough, continue reclaiming
         */
-       pages_for_compaction = (2UL << sc->order);
+       pages_for_compaction = compact_gap(sc->order);
        inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
        if (get_nr_swap_pages() > 0)
                inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
@@ -2514,7 +2512,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
                        continue;
 
                switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) {
-               case COMPACT_PARTIAL:
+               case COMPACT_SUCCESS:
                case COMPACT_CONTINUE:
                        return false;
                default:
@@ -2617,38 +2615,35 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 }
 
 /*
- * Returns true if compaction should go ahead for a high-order request, or
- * the high-order allocation would succeed without compaction.
+ * Returns true if compaction should go ahead for a costly-order request, or
+ * the allocation would already succeed without compaction. Return false if we
+ * should reclaim first.
  */
 static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 {
        unsigned long watermark;
-       bool watermark_ok;
+       enum compact_result suitable;
 
-       /*
-        * Compaction takes time to run and there are potentially other
-        * callers using the pages just freed. Continue reclaiming until
-        * there is a buffer of free pages available to give compaction
-        * a reasonable chance of completing and allocating the page
-        */
-       watermark = high_wmark_pages(zone) + (2UL << sc->order);
-       watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx);
-
-       /*
-        * If compaction is deferred, reclaim up to a point where
-        * compaction will have a chance of success when re-enabled
-        */
-       if (compaction_deferred(zone, sc->order))
-               return watermark_ok;
+       suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx);
+       if (suitable == COMPACT_SUCCESS)
+               /* Allocation should succeed already. Don't reclaim. */
+               return true;
+       if (suitable == COMPACT_SKIPPED)
+               /* Compaction cannot yet proceed. Do reclaim. */
+               return false;
 
        /*
-        * If compaction is not ready to start and allocation is not likely
-        * to succeed without it, then keep reclaiming.
+        * Compaction is already possible, but it takes time to run and there
+        * are potentially other callers using the pages just freed. So proceed
+        * with reclaim to make a buffer of free pages available to give
+        * compaction a reasonable chance of completing and allocating the page.
+        * Note that we won't actually reclaim the whole buffer in one attempt
+        * as the target watermark in should_continue_reclaim() is lower. But if
+        * we are already above the high+gap watermark, don't reclaim at all.
         */
-       if (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx) == COMPACT_SKIPPED)
-               return false;
+       watermark = high_wmark_pages(zone) + compact_gap(sc->order);
 
-       return watermark_ok;
+       return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx);
 }
 
 /*
@@ -3060,7 +3055,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
         */
        nid = mem_cgroup_select_victim_node(memcg);
 
-       zonelist = NODE_DATA(nid)->node_zonelists;
+       zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
 
        trace_mm_vmscan_memcg_reclaim_begin(0,
                                            sc.may_writepage,
@@ -3188,7 +3183,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
         * excessive reclaim. Assume that a process requested a high-order
         * can direct reclaim/compact.
         */
-       if (sc->order && sc->nr_reclaimed >= 2UL << sc->order)
+       if (sc->order && sc->nr_reclaimed >= compact_gap(sc->order))
                sc->order = 0;
 
        return sc->nr_scanned >= sc->nr_to_reclaim;
index 89cec42d19ffa8da5ad1e3c8e64ff4df1b3e562b..604f26a4f69678220f8cf41b88694d8355ab01b7 100644 (file)
@@ -1254,85 +1254,6 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
        return 0;
 }
 
-#ifdef CONFIG_PAGE_OWNER
-static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
-                                                       pg_data_t *pgdat,
-                                                       struct zone *zone)
-{
-       struct page *page;
-       struct page_ext *page_ext;
-       unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
-       unsigned long end_pfn = pfn + zone->spanned_pages;
-       unsigned long count[MIGRATE_TYPES] = { 0, };
-       int pageblock_mt, page_mt;
-       int i;
-
-       /* Scan block by block. First and last block may be incomplete */
-       pfn = zone->zone_start_pfn;
-
-       /*
-        * Walk the zone in pageblock_nr_pages steps. If a page block spans
-        * a zone boundary, it will be double counted between zones. This does
-        * not matter as the mixed block count will still be correct
-        */
-       for (; pfn < end_pfn; ) {
-               if (!pfn_valid(pfn)) {
-                       pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
-                       continue;
-               }
-
-               block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
-               block_end_pfn = min(block_end_pfn, end_pfn);
-
-               page = pfn_to_page(pfn);
-               pageblock_mt = get_pageblock_migratetype(page);
-
-               for (; pfn < block_end_pfn; pfn++) {
-                       if (!pfn_valid_within(pfn))
-                               continue;
-
-                       page = pfn_to_page(pfn);
-
-                       if (page_zone(page) != zone)
-                               continue;
-
-                       if (PageBuddy(page)) {
-                               pfn += (1UL << page_order(page)) - 1;
-                               continue;
-                       }
-
-                       if (PageReserved(page))
-                               continue;
-
-                       page_ext = lookup_page_ext(page);
-                       if (unlikely(!page_ext))
-                               continue;
-
-                       if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
-                               continue;
-
-                       page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
-                       if (pageblock_mt != page_mt) {
-                               if (is_migrate_cma(pageblock_mt))
-                                       count[MIGRATE_MOVABLE]++;
-                               else
-                                       count[pageblock_mt]++;
-
-                               pfn = block_end_pfn;
-                               break;
-                       }
-                       pfn += (1UL << page_ext->order) - 1;
-               }
-       }
-
-       /* Print counts */
-       seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
-       for (i = 0; i < MIGRATE_TYPES; i++)
-               seq_printf(m, "%12lu ", count[i]);
-       seq_putc(m, '\n');
-}
-#endif /* CONFIG_PAGE_OWNER */
-
 /*
  * Print out the number of pageblocks for each migratetype that contain pages
  * of other types. This gives an indication of how well fallbacks are being
@@ -1592,7 +1513,10 @@ static int vmstat_show(struct seq_file *m, void *arg)
 {
        unsigned long *l = arg;
        unsigned long off = l - (unsigned long *)m->private;
-       seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
+
+       seq_puts(m, vmstat_text[off]);
+       seq_put_decimal_ull(m, " ", *l);
+       seq_putc(m, '\n');
        return 0;
 }
 
@@ -1794,6 +1718,16 @@ static void __init start_shepherd_timer(void)
                round_jiffies_relative(sysctl_stat_interval));
 }
 
+static void __init init_cpu_node_state(void)
+{
+       int cpu;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu)
+               node_set_state(cpu_to_node(cpu), N_CPU);
+       put_online_cpus();
+}
+
 static void vmstat_cpu_dead(int node)
 {
        int cpu;
@@ -1851,6 +1785,7 @@ static int __init setup_vmstat(void)
 #ifdef CONFIG_SMP
        cpu_notifier_register_begin();
        __register_cpu_notifier(&vmstat_notifier);
+       init_cpu_node_state();
 
        start_shepherd_timer();
        cpu_notifier_register_done();
index b0bc023d25c539e05b5a874b65dcf575a136f271..7b5fd2b9095e200d577e61d6ad755209e3dd1c4b 100644 (file)
@@ -31,6 +31,8 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#define CREATE_TRACE_POINTS
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -53,6 +55,7 @@
 #include <linux/mount.h>
 #include <linux/migrate.h>
 #include <linux/pagemap.h>
+#include <trace/events/zsmalloc.h>
 
 #define ZSPAGE_MAGIC   0x58
 
@@ -1767,9 +1770,12 @@ struct zs_compact_control {
        /* Destination page for migration which should be a first page
         * of zspage. */
        struct page *d_page;
-        /* Starting object index within @s_page which used for live object
-         * in the subpage. */
+       /* Starting object index within @s_page which used for live object
+        * in the subpage. */
        int obj_idx;
+
+       unsigned long nr_migrated_obj;
+       unsigned long nr_freed_pages;
 };
 
 static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
@@ -1803,6 +1809,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
                free_obj = obj_malloc(class, get_zspage(d_page), handle);
                zs_object_copy(class, free_obj, used_obj);
                obj_idx++;
+               cc->nr_migrated_obj++;
                /*
                 * record_obj updates handle's value to free_obj and it will
                 * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which
@@ -2260,7 +2267,10 @@ static unsigned long zs_can_compact(struct size_class *class)
 
 static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 {
-       struct zs_compact_control cc;
+       struct zs_compact_control cc = {
+               .nr_migrated_obj = 0,
+               .nr_freed_pages = 0,
+       };
        struct zspage *src_zspage;
        struct zspage *dst_zspage = NULL;
 
@@ -2292,7 +2302,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
                putback_zspage(class, dst_zspage);
                if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
                        free_zspage(pool, class, src_zspage);
-                       pool->stats.pages_compacted += class->pages_per_zspage;
+                       cc.nr_freed_pages += class->pages_per_zspage;
                }
                spin_unlock(&class->lock);
                cond_resched();
@@ -2303,12 +2313,18 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
                putback_zspage(class, src_zspage);
 
        spin_unlock(&class->lock);
+
+       pool->stats.pages_compacted += cc.nr_freed_pages;
+       trace_zs_compact(class->index, cc.nr_migrated_obj, cc.nr_freed_pages);
 }
 
 unsigned long zs_compact(struct zs_pool *pool)
 {
        int i;
        struct size_class *class;
+       unsigned long pages_compacted_before = pool->stats.pages_compacted;
+
+       trace_zs_compact_start(pool->name);
 
        for (i = zs_size_classes - 1; i >= 0; i--) {
                class = pool->size_class[i];
@@ -2319,6 +2335,9 @@ unsigned long zs_compact(struct zs_pool *pool)
                __zs_compact(pool, class);
        }
 
+       trace_zs_compact_end(pool->name,
+               pool->stats.pages_compacted - pages_compacted_before);
+
        return pool->stats.pages_compacted;
 }
 EXPORT_SYMBOL_GPL(zs_compact);
index 66ddcb60519a169c02b4f5041d1bf1df731763b8..7cf7d6e380c2c87ecccb11bae3f677676062d11f 100644 (file)
@@ -258,7 +258,7 @@ int ping_init_sock(struct sock *sk)
        struct net *net = sock_net(sk);
        kgid_t group = current_egid();
        struct group_info *group_info;
-       int i, j, count;
+       int i;
        kgid_t low, high;
        int ret = 0;
 
@@ -270,16 +270,11 @@ int ping_init_sock(struct sock *sk)
                return 0;
 
        group_info = get_current_groups();
-       count = group_info->ngroups;
-       for (i = 0; i < group_info->nblocks; i++) {
-               int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
-               for (j = 0; j < cp_count; j++) {
-                       kgid_t gid = group_info->blocks[i][j];
-                       if (gid_lte(low, gid) && gid_lte(gid, high))
-                               goto out_release_group;
-               }
+       for (i = 0; i < group_info->ngroups; i++) {
+               kgid_t gid = group_info->gid[i];
 
-               count -= cp_count;
+               if (gid_lte(low, gid) && gid_lte(gid, high))
+                       goto out_release_group;
        }
 
        ret = -EACCES;
index 168219535a341056c22679273771dcf7d7656f28..83dffeadf20acbec87e93ef96fdad20d80e8f630 100644 (file)
@@ -176,8 +176,8 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
        if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
                goto out_nomatch;
        for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
-               if (!gid_eq(GROUP_AT(gcred->acred.group_info, i),
-                               GROUP_AT(acred->group_info, i)))
+               if (!gid_eq(gcred->acred.group_info->gid[i],
+                               acred->group_info->gid[i]))
                        goto out_nomatch;
        }
 out_match:
index eeeba5adee6d939ab3429100d231a46e82b1ff94..dc6fb79a361f1ca3ab9869fc02ba05c1a533ad9b 100644 (file)
@@ -229,7 +229,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
                kgid = make_kgid(&init_user_ns, tmp);
                if (!gid_valid(kgid))
                        goto out_free_groups;
-               GROUP_AT(creds->cr_group_info, i) = kgid;
+               creds->cr_group_info->gid[i] = kgid;
        }
 
        return 0;
index 1d281816f2bf14e34a71932863e5579e4a5caa35..529ae2573fc8ce903ecbf456e713c8961da5f18d 100644 (file)
@@ -479,7 +479,7 @@ static int rsc_parse(struct cache_detail *cd,
                        kgid = make_kgid(&init_user_ns, id);
                        if (!gid_valid(kgid))
                                goto out;
-                       GROUP_AT(rsci.cred.cr_group_info, i) = kgid;
+                       rsci.cred.cr_group_info->gid[i] = kgid;
                }
 
                /* mech name */
index a99278c984e82a2156c20e3a4a7fa174783b9297..a1d768a973f5297a60899d475763d184228010db 100644 (file)
@@ -79,7 +79,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
 
        cred->uc_gid = acred->gid;
        for (i = 0; i < groups; i++)
-               cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
+               cred->uc_gids[i] = acred->group_info->gid[i];
        if (i < NFS_NGROUPS)
                cred->uc_gids[i] = INVALID_GID;
 
@@ -127,7 +127,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
        if (groups > NFS_NGROUPS)
                groups = NFS_NGROUPS;
        for (i = 0; i < groups ; i++)
-               if (!gid_eq(cred->uc_gids[i], GROUP_AT(acred->group_info, i)))
+               if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
                        return 0;
        if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups]))
                return 0;
index dfacdc95b3f52010b3342c53dbd06788f1378128..64af4f034de693f921faf3f818d833e868b0a2e2 100644 (file)
@@ -517,7 +517,7 @@ static int unix_gid_parse(struct cache_detail *cd,
                kgid = make_kgid(&init_user_ns, gid);
                if (!gid_valid(kgid))
                        goto out;
-               GROUP_AT(ug.gi, i) = kgid;
+               ug.gi->gid[i] = kgid;
        }
 
        ugp = unix_gid_lookup(cd, uid);
@@ -564,7 +564,7 @@ static int unix_gid_show(struct seq_file *m,
 
        seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen);
        for (i = 0; i < glen; i++)
-               seq_printf(m, " %d", from_kgid_munged(user_ns, GROUP_AT(ug->gi, i)));
+               seq_printf(m, " %d", from_kgid_munged(user_ns, ug->gi->gid[i]));
        seq_printf(m, "\n");
        return 0;
 }
@@ -817,7 +817,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
                return SVC_CLOSE;
        for (i = 0; i < slen; i++) {
                kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
-               GROUP_AT(cred->cr_group_info, i) = kgid;
+               cred->cr_group_info->gid[i] = kgid;
        }
        if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
                *authp = rpc_autherr_badverf;
index 206a6b346a8dbc3d38b1771093671fa37f1daf27..3fa2b2e973eb9875a5684b26adebc0d16af47838 100755 (executable)
@@ -54,6 +54,7 @@ my $min_conf_desc_length = 4;
 my $spelling_file = "$D/spelling.txt";
 my $codespell = 0;
 my $codespellfile = "/usr/share/codespell/dictionary.txt";
+my $conststructsfile = "$D/const_structs.checkpatch";
 my $color = 1;
 my $allow_c99_comments = 1;
 
@@ -541,6 +542,32 @@ our $mode_perms_world_writable = qr{
        0[0-7][0-7][2367]
 }x;
 
+our %mode_permission_string_types = (
+       "S_IRWXU" => 0700,
+       "S_IRUSR" => 0400,
+       "S_IWUSR" => 0200,
+       "S_IXUSR" => 0100,
+       "S_IRWXG" => 0070,
+       "S_IRGRP" => 0040,
+       "S_IWGRP" => 0020,
+       "S_IXGRP" => 0010,
+       "S_IRWXO" => 0007,
+       "S_IROTH" => 0004,
+       "S_IWOTH" => 0002,
+       "S_IXOTH" => 0001,
+       "S_IRWXUGO" => 0777,
+       "S_IRUGO" => 0444,
+       "S_IWUGO" => 0222,
+       "S_IXUGO" => 0111,
+);
+
+#Create a search pattern for all these strings to speed up a loop below
+our $mode_perms_string_search = "";
+foreach my $entry (keys %mode_permission_string_types) {
+       $mode_perms_string_search .= '|' if ($mode_perms_string_search ne "");
+       $mode_perms_string_search .= $entry;
+}
+
 our $allowed_asm_includes = qr{(?x:
        irq|
        memory|
@@ -598,6 +625,29 @@ if ($codespell) {
 
 $misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix;
 
+my $const_structs = "";
+if (open(my $conststructs, '<', $conststructsfile)) {
+       while (<$conststructs>) {
+               my $line = $_;
+
+               $line =~ s/\s*\n?$//g;
+               $line =~ s/^\s*//g;
+
+               next if ($line =~ m/^\s*#/);
+               next if ($line =~ m/^\s*$/);
+               if ($line =~ /\s/) {
+                       print("$conststructsfile: '$line' invalid - ignored\n");
+                       next;
+               }
+
+               $const_structs .= '|' if ($const_structs ne "");
+               $const_structs .= $line;
+       }
+       close($conststructsfile);
+} else {
+       warn "No structs that should be const will be found - file '$conststructsfile': $!\n";
+}
+
 sub build_types {
        my $mods = "(?x:  \n" . join("|\n  ", (@modifierList, @modifierListFile)) . "\n)";
        my $all = "(?x:  \n" . join("|\n  ", (@typeList, @typeListFile)) . "\n)";
@@ -704,6 +754,16 @@ sub seed_camelcase_file {
        }
 }
 
+sub is_maintained_obsolete {
+       my ($filename) = @_;
+
+       return 0 if (!(-e "$root/scripts/get_maintainer.pl"));
+
+       my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`;
+
+       return $status =~ /obsolete/i;
+}
+
 my $camelcase_seeded = 0;
 sub seed_camelcase_includes {
        return if ($camelcase_seeded);
@@ -2289,6 +2349,10 @@ sub process {
                }
 
                if ($found_file) {
+                       if (is_maintained_obsolete($realfile)) {
+                               WARN("OBSOLETE",
+                                    "$realfile is marked as 'obsolete' in the MAINTAINERS hierarchy.  No unnecessary modifications please.\n");
+                       }
                        if ($realfile =~ m@^(?:drivers/net/|net/|drivers/staging/)@) {
                                $check = 1;
                        } else {
@@ -2939,6 +3003,30 @@ sub process {
                             "Block comments use a trailing */ on a separate line\n" . $herecurr);
                }
 
+# Block comment * alignment
+               if ($prevline =~ /$;[ \t]*$/ &&                 #ends in comment
+                   $line =~ /^\+[ \t]*$;/ &&                   #leading comment
+                   $rawline =~ /^\+[ \t]*\*/ &&                #leading *
+                   (($prevrawline =~ /^\+.*?\/\*/ &&           #leading /*
+                     $prevrawline !~ /\*\/[ \t]*$/) ||         #no trailing */
+                    $prevrawline =~ /^\+[ \t]*\*/)) {          #leading *
+                       my $oldindent;
+                       $prevrawline =~ m@^\+([ \t]*/?)\*@;
+                       if (defined($1)) {
+                               $oldindent = expand_tabs($1);
+                       } else {
+                               $prevrawline =~ m@^\+(.*/?)\*@;
+                               $oldindent = expand_tabs($1);
+                       }
+                       $rawline =~ m@^\+([ \t]*)\*@;
+                       my $newindent = $1;
+                       $newindent = expand_tabs($newindent);
+                       if (length($oldindent) ne length($newindent)) {
+                               WARN("BLOCK_COMMENT_STYLE",
+                                    "Block comments should align the * on each line\n" . $hereprev);
+                       }
+               }
+
 # check for missing blank lines after struct/union declarations
 # with exceptions for various attributes and macros
                if ($prevline =~ /^[\+ ]};?\s*$/ &&
@@ -5495,46 +5583,46 @@ sub process {
                }
 
 # Check for memcpy(foo, bar, ETH_ALEN) that could be ether_addr_copy(foo, bar)
-               if ($^V && $^V ge 5.10.0 &&
-                   defined $stat &&
-                   $stat =~ /^\+(?:.*?)\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
-                       if (WARN("PREFER_ETHER_ADDR_COPY",
-                                "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . "$here\n$stat\n") &&
-                           $fix) {
-                               $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
-                       }
-               }
+#              if ($^V && $^V ge 5.10.0 &&
+#                  defined $stat &&
+#                  $stat =~ /^\+(?:.*?)\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
+#                      if (WARN("PREFER_ETHER_ADDR_COPY",
+#                               "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . "$here\n$stat\n") &&
+#                          $fix) {
+#                              $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
+#                      }
+#              }
 
 # Check for memcmp(foo, bar, ETH_ALEN) that could be ether_addr_equal*(foo, bar)
-               if ($^V && $^V ge 5.10.0 &&
-                   defined $stat &&
-                   $stat =~ /^\+(?:.*?)\bmemcmp\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
-                       WARN("PREFER_ETHER_ADDR_EQUAL",
-                            "Prefer ether_addr_equal() or ether_addr_equal_unaligned() over memcmp()\n" . "$here\n$stat\n")
-               }
+#              if ($^V && $^V ge 5.10.0 &&
+#                  defined $stat &&
+#                  $stat =~ /^\+(?:.*?)\bmemcmp\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
+#                      WARN("PREFER_ETHER_ADDR_EQUAL",
+#                           "Prefer ether_addr_equal() or ether_addr_equal_unaligned() over memcmp()\n" . "$here\n$stat\n")
+#              }
 
 # check for memset(foo, 0x0, ETH_ALEN) that could be eth_zero_addr
 # check for memset(foo, 0xFF, ETH_ALEN) that could be eth_broadcast_addr
-               if ($^V && $^V ge 5.10.0 &&
-                   defined $stat &&
-                   $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
-
-                       my $ms_val = $7;
-
-                       if ($ms_val =~ /^(?:0x|)0+$/i) {
-                               if (WARN("PREFER_ETH_ZERO_ADDR",
-                                        "Prefer eth_zero_addr over memset()\n" . "$here\n$stat\n") &&
-                                   $fix) {
-                                       $fixed[$fixlinenr] =~ s/\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*,\s*ETH_ALEN\s*\)/eth_zero_addr($2)/;
-                               }
-                       } elsif ($ms_val =~ /^(?:0xff|255)$/i) {
-                               if (WARN("PREFER_ETH_BROADCAST_ADDR",
-                                        "Prefer eth_broadcast_addr() over memset()\n" . "$here\n$stat\n") &&
-                                   $fix) {
-                                       $fixed[$fixlinenr] =~ s/\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*,\s*ETH_ALEN\s*\)/eth_broadcast_addr($2)/;
-                               }
-                       }
-               }
+#              if ($^V && $^V ge 5.10.0 &&
+#                  defined $stat &&
+#                  $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
+#
+#                      my $ms_val = $7;
+#
+#                      if ($ms_val =~ /^(?:0x|)0+$/i) {
+#                              if (WARN("PREFER_ETH_ZERO_ADDR",
+#                                       "Prefer eth_zero_addr over memset()\n" . "$here\n$stat\n") &&
+#                                  $fix) {
+#                                      $fixed[$fixlinenr] =~ s/\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*,\s*ETH_ALEN\s*\)/eth_zero_addr($2)/;
+#                              }
+#                      } elsif ($ms_val =~ /^(?:0xff|255)$/i) {
+#                              if (WARN("PREFER_ETH_BROADCAST_ADDR",
+#                                       "Prefer eth_broadcast_addr() over memset()\n" . "$here\n$stat\n") &&
+#                                  $fix) {
+#                                      $fixed[$fixlinenr] =~ s/\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*,\s*ETH_ALEN\s*\)/eth_broadcast_addr($2)/;
+#                              }
+#                      }
+#              }
 
 # typecasts on min/max could be min_t/max_t
                if ($^V && $^V ge 5.10.0 &&
@@ -5853,46 +5941,6 @@ sub process {
                }
 
 # check for various structs that are normally const (ops, kgdb, device_tree)
-               my $const_structs = qr{
-                               acpi_dock_ops|
-                               address_space_operations|
-                               backlight_ops|
-                               block_device_operations|
-                               dentry_operations|
-                               dev_pm_ops|
-                               dma_map_ops|
-                               extent_io_ops|
-                               file_lock_operations|
-                               file_operations|
-                               hv_ops|
-                               ide_dma_ops|
-                               intel_dvo_dev_ops|
-                               item_operations|
-                               iwl_ops|
-                               kgdb_arch|
-                               kgdb_io|
-                               kset_uevent_ops|
-                               lock_manager_operations|
-                               microcode_ops|
-                               mtrr_ops|
-                               neigh_ops|
-                               nlmsvc_binding|
-                               of_device_id|
-                               pci_raw_ops|
-                               pipe_buf_operations|
-                               platform_hibernation_ops|
-                               platform_suspend_ops|
-                               proto_ops|
-                               rpc_pipe_ops|
-                               seq_operations|
-                               snd_ac97_build_ops|
-                               soc_pcmcia_socket_ops|
-                               stacktrace_ops|
-                               sysfs_ops|
-                               tty_operations|
-                               uart_ops|
-                               usb_mon_operations|
-                               wd_ops}x;
                if ($line !~ /\bconst\b/ &&
                    $line =~ /\bstruct\s+($const_structs)\b/) {
                        WARN("CONST_STRUCT",
@@ -5989,20 +6037,31 @@ sub process {
                                        $arg_pos--;
                                        $skip_args = "(?:\\s*$FuncArg\\s*,\\s*){$arg_pos,$arg_pos}";
                                }
-                               my $test = "\\b$func\\s*\\(${skip_args}([\\d]+)\\s*[,\\)]";
+                               my $test = "\\b$func\\s*\\(${skip_args}($FuncArg(?:\\|\\s*$FuncArg)*)\\s*[,\\)]";
                                if ($line =~ /$test/) {
                                        my $val = $1;
                                        $val = $6 if ($skip_args ne "");
-
-                                       if ($val !~ /^0$/ &&
-                                           (($val =~ /^$Int$/ && $val !~ /^$Octal$/) ||
-                                            length($val) ne 4)) {
+                                       if (($val =~ /^$Int$/ && $val !~ /^$Octal$/) ||
+                                           ($val =~ /^$Octal$/ && length($val) ne 4)) {
                                                ERROR("NON_OCTAL_PERMISSIONS",
                                                      "Use 4 digit octal (0777) not decimal permissions\n" . $herecurr);
-                                       } elsif ($val =~ /^$Octal$/ && (oct($val) & 02)) {
+                                       }
+                                       if ($val =~ /^$Octal$/ && (oct($val) & 02)) {
                                                ERROR("EXPORTED_WORLD_WRITABLE",
                                                      "Exporting writable files is usually an error. Consider more restrictive permissions.\n" . $herecurr);
                                        }
+                                       if ($val =~ /\b$mode_perms_string_search\b/) {
+                                               my $to = 0;
+                                               while ($val =~ /\b($mode_perms_string_search)\b(?:\s*\|\s*)?\s*/g) {
+                                                       $to |=  $mode_permission_string_types{$1};
+                                               }
+                                               my $new = sprintf("%04o", $to);
+                                               if (WARN("SYMBOLIC_PERMS",
+                                                        "Symbolic permissions are not preferred. Consider using octal permissions $new.\n" . $herecurr) &&
+                                                   $fix) {
+                                                       $fixed[$fixlinenr] =~ s/\Q$val\E/$new/;
+                                               }
+                                       }
                                }
                        }
                }
diff --git a/scripts/const_structs.checkpatch b/scripts/const_structs.checkpatch
new file mode 100644 (file)
index 0000000..ac5f126
--- /dev/null
@@ -0,0 +1,64 @@
+acpi_dock_ops
+address_space_operations
+backlight_ops
+block_device_operations
+clk_ops
+comedi_lrange
+component_ops
+dentry_operations
+dev_pm_ops
+dma_map_ops
+driver_info
+drm_connector_funcs
+drm_encoder_funcs
+drm_encoder_helper_funcs
+ethtool_ops
+extent_io_ops
+file_lock_operations
+file_operations
+hv_ops
+ide_dma_ops
+ide_port_ops
+inode_operations
+intel_dvo_dev_ops
+irq_domain_ops
+item_operations
+iwl_cfg
+iwl_ops
+kgdb_arch
+kgdb_io
+kset_uevent_ops
+lock_manager_operations
+machine_desc
+microcode_ops
+mlxsw_reg_info
+mtrr_ops
+neigh_ops
+net_device_ops
+nlmsvc_binding
+nvkm_device_chip
+of_device_id
+pci_raw_ops
+pipe_buf_operations
+platform_hibernation_ops
+platform_suspend_ops
+proto_ops
+regmap_access_table
+rpc_pipe_ops
+rtc_class_ops
+sd_desc
+seq_operations
+sirfsoc_padmux
+snd_ac97_build_ops
+snd_soc_component_driver
+soc_pcmcia_socket_ops
+stacktrace_ops
+sysfs_ops
+tty_operations
+uart_ops
+usb_mon_operations
+v4l2_ctrl_ops
+v4l2_ioctl_ops
+vm_operations_struct
+wacom_features
+wd_ops
index 19d9bcadc0ccd448fd4cff3e687ec61e3f95001a..8257ef422c0a292f8f86b59b292d92c5d1e2870a 100644 (file)
@@ -7,13 +7,6 @@ modpost-objs   := modpost.o file2alias.o sumversion.o
 
 devicetable-offsets-file := devicetable-offsets.h
 
-define sed-y
-       "/^->/{s:->#\(.*\):/* \1 */:; \
-       s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
-       s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
-       s:->::; p;}"
-endef
-
 quiet_cmd_offsets = GEN     $@
 define cmd_offsets
        (set -e; \
@@ -25,9 +18,7 @@ define cmd_offsets
         echo " * This file was generated by Kbuild"; \
         echo " *"; \
         echo " */"; \
-        echo ""; \
-        sed -ne $(sed-y) $<; \
-        echo ""; \
+        sed -ne '/#define/{s/\$$//;s/#//2;s/$$/*\//;p;}' $<; \
         echo "#endif" ) > $@
 endef
 
index 48958d3cec9e38473ff3296f7f6bf4d5db41612a..bd834975909510335322ec8f34970573a8708dfc 100644 (file)
@@ -888,7 +888,7 @@ static void check_section(const char *modname, struct elf_info *elf,
 
 #define DATA_SECTIONS ".data", ".data.rel"
 #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \
-               ".kprobes.text"
+               ".kprobes.text", ".cpuidle.text"
 #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
                ".fixup", ".entry.text", ".exception.text", ".text.*", \
                ".coldtext"
index 42396a74405df03002001d8f972599fa827bb3bf..c0222107cf58fb104b0f4a2c6cb240b06d58e705 100644 (file)
@@ -364,6 +364,7 @@ is_mcounted_section_name(char const *const txtname)
                strcmp(".spinlock.text", txtname) == 0 ||
                strcmp(".irqentry.text", txtname) == 0 ||
                strcmp(".kprobes.text", txtname) == 0 ||
+               strcmp(".cpuidle.text", txtname) == 0 ||
                strcmp(".text.unlikely", txtname) == 0;
 }
 
index 96e2486a6fc479559eba03662317c82f802a0f43..29cecf9b504f028e8fe1b25a112ff57a794b5498 100755 (executable)
@@ -135,6 +135,7 @@ my %text_sections = (
      ".spinlock.text" => 1,
      ".irqentry.text" => 1,
      ".kprobes.text" => 1,
+     ".cpuidle.text" => 1,
      ".text.unlikely" => 1,
 );
 
index fa79c6d2a5b88975d4504de87234aa33c64b1938..163c720d3f2bb8d8a5c48a5c9bb64d98a6b5263e 100644 (file)
@@ -629,7 +629,6 @@ mispelt||misspelt
 miximum||maximum
 mmnemonic||mnemonic
 mnay||many
-modeled||modelled
 modulues||modules
 monochorome||monochrome
 monochromo||monochrome
index 3b530467148ec44d4b329e5bece6c5d9b9c056d4..6079ec142685f9e23b878b25dcdab3f74e001ea1 100644 (file)
@@ -3,7 +3,8 @@ CFLAGS += -I. -g -Wall -D_LGPL_SOURCE
 LDFLAGS += -lpthread -lurcu
 TARGETS = main
 OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \
-        regression1.o regression2.o regression3.o multiorder.o
+        regression1.o regression2.o regression3.o multiorder.o \
+        iteration_check.o
 
 targets: $(TARGETS)
 
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
new file mode 100644 (file)
index 0000000..9adb8e7
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * iteration_check.c: test races having to do with radix tree iteration
+ * Copyright (c) 2016 Intel Corporation
+ * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <pthread.h>
+#include "test.h"
+
+#define NUM_THREADS 4
+#define TAG 0
+static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_t threads[NUM_THREADS];
+RADIX_TREE(tree, GFP_KERNEL);
+bool test_complete;
+
+/* relentlessly fill the tree with tagged entries */
+static void *add_entries_fn(void *arg)
+{
+       int pgoff;
+
+       while (!test_complete) {
+               for (pgoff = 0; pgoff < 100; pgoff++) {
+                       pthread_mutex_lock(&tree_lock);
+                       if (item_insert(&tree, pgoff) == 0)
+                               item_tag_set(&tree, pgoff, TAG);
+                       pthread_mutex_unlock(&tree_lock);
+               }
+       }
+
+       return NULL;
+}
+
+/*
+ * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find
+ * things that have been removed and randomly resetting our iteration to the
+ * next chunk with radix_tree_iter_next().  Both radix_tree_iter_retry() and
+ * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *tagged_iteration_fn(void *arg)
+{
+       struct radix_tree_iter iter;
+       void **slot;
+
+       while (!test_complete) {
+               rcu_read_lock();
+               radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) {
+                       void *entry;
+                       int i;
+
+                       /* busy wait to let removals happen */
+                       for (i = 0; i < 1000000; i++)
+                               ;
+
+                       entry = radix_tree_deref_slot(slot);
+                       if (unlikely(!entry))
+                               continue;
+
+                       if (radix_tree_deref_retry(entry)) {
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
+                       }
+
+                       if (rand() % 50 == 0)
+                               slot = radix_tree_iter_next(&iter);
+               }
+               rcu_read_unlock();
+       }
+
+       return NULL;
+}
+
+/*
+ * Iterate over the entries, doing a radix_tree_iter_retry() as we find things
+ * that have been removed and randomly resetting our iteration to the next
+ * chunk with radix_tree_iter_next().  Both radix_tree_iter_retry() and
+ * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *untagged_iteration_fn(void *arg)
+{
+       struct radix_tree_iter iter;
+       void **slot;
+
+       while (!test_complete) {
+               rcu_read_lock();
+               radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+                       void *entry;
+                       int i;
+
+                       /* busy wait to let removals happen */
+                       for (i = 0; i < 1000000; i++)
+                               ;
+
+                       entry = radix_tree_deref_slot(slot);
+                       if (unlikely(!entry))
+                               continue;
+
+                       if (radix_tree_deref_retry(entry)) {
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
+                       }
+
+                       if (rand() % 50 == 0)
+                               slot = radix_tree_iter_next(&iter);
+               }
+               rcu_read_unlock();
+       }
+
+       return NULL;
+}
+
+/*
+ * Randomly remove entries to help induce radix_tree_iter_retry() calls in the
+ * two iteration functions.
+ */
+static void *remove_entries_fn(void *arg)
+{
+       while (!test_complete) {
+               int pgoff;
+
+               pgoff = rand() % 100;
+
+               pthread_mutex_lock(&tree_lock);
+               item_delete(&tree, pgoff);
+               pthread_mutex_unlock(&tree_lock);
+       }
+
+       return NULL;
+}
+
+/* This is a unit test for a bug found by the syzkaller tester */
+void iteration_test(void)
+{
+       int i;
+
+       printf("Running iteration tests for 10 seconds\n");
+
+       srand(time(0));
+       test_complete = false;
+
+       if (pthread_create(&threads[0], NULL, tagged_iteration_fn, NULL)) {
+               perror("pthread_create");
+               exit(1);
+       }
+       if (pthread_create(&threads[1], NULL, untagged_iteration_fn, NULL)) {
+               perror("pthread_create");
+               exit(1);
+       }
+       if (pthread_create(&threads[2], NULL, add_entries_fn, NULL)) {
+               perror("pthread_create");
+               exit(1);
+       }
+       if (pthread_create(&threads[3], NULL, remove_entries_fn, NULL)) {
+               perror("pthread_create");
+               exit(1);
+       }
+
+       sleep(10);
+       test_complete = true;
+
+       for (i = 0; i < NUM_THREADS; i++) {
+               if (pthread_join(threads[i], NULL)) {
+                       perror("pthread_join");
+                       exit(1);
+               }
+       }
+
+       item_kill_tree(&tree);
+}
index b7619ff3b552ba9d6f206a98b4b21731403a83cc..daa9010693e8374148a57481285f7fbc457a1356 100644 (file)
@@ -332,6 +332,7 @@ int main(int argc, char **argv)
        regression1_test();
        regression2_test();
        regression3_test();
+       iteration_test();
        single_thread_tests(long_run);
 
        sleep(1);
index 2d03a63bb79c6777c144296fb18e4e4ebcbd6ab3..0d6813a61b37f904b436e74448c910a350c10bb3 100644 (file)
@@ -43,7 +43,7 @@
 #include "regression.h"
 
 static RADIX_TREE(mt_tree, GFP_KERNEL);
-static pthread_mutex_t mt_lock;
+static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
 
 struct page {
        pthread_mutex_t lock;
index e85131369723c971dbd5f7f198c389b4cd75fc3a..217fb2403f0901c13d55936fa7cfe0c6c9da4157 100644 (file)
@@ -27,6 +27,7 @@ void item_kill_tree(struct radix_tree_root *root);
 
 void tag_check(void);
 void multiorder_checks(void);
+void iteration_test(void);
 
 struct item *
 item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
index a937a9d26b600e6bb0f2caf5134caa06854c3a12..142c565bb3518d0311f0049f135f7de55d336301 100644 (file)
@@ -7,3 +7,4 @@ mlock2-tests
 on-fault-limit
 transhuge-stress
 userfaultfd
+mlock-intersect-test
index e4bb1de1d526d8717b447ed932e70138e9ddc1d7..bbab7f4664acdc6a0d535b2787c59f00253e127b 100644 (file)
@@ -10,6 +10,7 @@ BINARIES += on-fault-limit
 BINARIES += thuge-gen
 BINARIES += transhuge-stress
 BINARIES += userfaultfd
+BINARIES += mlock-random-test
 
 all: $(BINARIES)
 %: %.c
@@ -17,6 +18,9 @@ all: $(BINARIES)
 userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h
        $(CC) $(CFLAGS) -O2 -o $@ $< -lpthread
 
+mlock-random-test: mlock-random-test.c
+       $(CC) $(CFLAGS) -o $@ $< -lcap
+
 ../../../../usr/include/linux/kernel.h:
        make -C ../../../.. headers_install
 
diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c
new file mode 100644 (file)
index 0000000..83de4f5
--- /dev/null
@@ -0,0 +1,293 @@
+/*
+ * It tests the mlock/mlock2() when they are invoked
+ * on randomly memory region.
+ */
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/capability.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <time.h>
+#include "mlock2.h"
+
+#define CHUNK_UNIT (128 * 1024)
+#define MLOCK_RLIMIT_SIZE (CHUNK_UNIT * 2)
+#define MLOCK_WITHIN_LIMIT_SIZE CHUNK_UNIT
+#define MLOCK_OUTOF_LIMIT_SIZE (CHUNK_UNIT * 3)
+
+#define TEST_LOOP 100
+#define PAGE_ALIGN(size, ps) (((size) + ((ps) - 1)) & ~((ps) - 1))
+
+int set_cap_limits(rlim_t max)
+{
+       struct rlimit new;
+       cap_t cap = cap_init();
+
+       new.rlim_cur = max;
+       new.rlim_max = max;
+       if (setrlimit(RLIMIT_MEMLOCK, &new)) {
+               perror("setrlimit() returns error\n");
+               return -1;
+       }
+
+       /* drop capabilities including CAP_IPC_LOCK */
+       if (cap_set_proc(cap)) {
+               perror("cap_set_proc() returns error\n");
+               return -2;
+       }
+
+       return 0;
+}
+
+int get_proc_locked_vm_size(void)
+{
+       FILE *f;
+       int ret = -1;
+       char line[1024] = {0};
+       unsigned long lock_size = 0;
+
+       f = fopen("/proc/self/status", "r");
+       if (!f) {
+               perror("fopen");
+               return -1;
+       }
+
+       while (fgets(line, 1024, f)) {
+               if (strstr(line, "VmLck")) {
+                       ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size);
+                       if (ret <= 0) {
+                               printf("sscanf() on VmLck error: %s: %d\n",
+                                               line, ret);
+                               fclose(f);
+                               return -1;
+                       }
+                       fclose(f);
+                       return (int)(lock_size << 10);
+               }
+       }
+
+       perror("cann't parse VmLck in /proc/self/status\n");
+       fclose(f);
+       return -1;
+}
+
+/*
+ * Get the MMUPageSize of the memory region including input
+ * address from proc file.
+ *
+ * return value: on error case, 0 will be returned.
+ * Otherwise the page size(in bytes) is returned.
+ */
+int get_proc_page_size(unsigned long addr)
+{
+       FILE *smaps;
+       char *line;
+       unsigned long mmupage_size = 0;
+       size_t size;
+
+       smaps = seek_to_smaps_entry(addr);
+       if (!smaps) {
+               printf("Unable to parse /proc/self/smaps\n");
+               return 0;
+       }
+
+       while (getline(&line, &size, smaps) > 0) {
+               if (!strstr(line, "MMUPageSize")) {
+                       free(line);
+                       line = NULL;
+                       size = 0;
+                       continue;
+               }
+
+               /* found the MMUPageSize of this section */
+               if (sscanf(line, "MMUPageSize:    %8lu kB",
+                                       &mmupage_size) < 1) {
+                       printf("Unable to parse smaps entry for Size:%s\n",
+                                       line);
+                       break;
+               }
+
+       }
+       free(line);
+       if (smaps)
+               fclose(smaps);
+       return mmupage_size << 10;
+}
+
+/*
+ * Test mlock/mlock2() on provided memory chunk.
+ * It expects the mlock/mlock2() to be successful (within rlimit)
+ *
+ * With allocated memory chunk [p, p + alloc_size), this
+ * test will choose start/len randomly to perform mlock/mlock2
+ * [start, start +  len] memory range. The range is within range
+ * of the allocated chunk.
+ *
+ * The memory region size alloc_size is within the rlimit.
+ * So we always expect a success of mlock/mlock2.
+ *
+ * VmLck is assumed to be 0 before this test.
+ *
+ *    return value: 0 - success
+ *    else: failure
+ */
+int test_mlock_within_limit(char *p, int alloc_size)
+{
+       int i;
+       int ret = 0;
+       int locked_vm_size = 0;
+       struct rlimit cur;
+       int page_size = 0;
+
+       getrlimit(RLIMIT_MEMLOCK, &cur);
+       if (cur.rlim_cur < alloc_size) {
+               printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
+                               alloc_size, (unsigned int)cur.rlim_cur);
+               return -1;
+       }
+
+       srand(time(NULL));
+       for (i = 0; i < TEST_LOOP; i++) {
+               /*
+                * - choose mlock/mlock2 randomly
+                * - choose lock_size randomly but lock_size < alloc_size
+                * - choose start_offset randomly but p+start_offset+lock_size
+                *   < p+alloc_size
+                */
+               int is_mlock = !!(rand() % 2);
+               int lock_size = rand() % alloc_size;
+               int start_offset = rand() % (alloc_size - lock_size);
+
+               if (is_mlock)
+                       ret = mlock(p + start_offset, lock_size);
+               else
+                       ret = mlock2_(p + start_offset, lock_size,
+                                      MLOCK_ONFAULT);
+
+               if (ret) {
+                       printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
+                                       is_mlock ? "mlock" : "mlock2",
+                                       p, alloc_size,
+                                       p + start_offset, lock_size);
+                       return ret;
+               }
+       }
+
+       /*
+        * Check VmLck left by the tests.
+        */
+       locked_vm_size = get_proc_locked_vm_size();
+       page_size = get_proc_page_size((unsigned long)p);
+       if (page_size == 0) {
+               printf("cannot get proc MMUPageSize\n");
+               return -1;
+       }
+
+       if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) {
+               printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n",
+                               locked_vm_size, alloc_size);
+               return -1;
+       }
+
+       return 0;
+}
+
+
+/*
+ * We expect the mlock/mlock2() to be fail (outof limitation)
+ *
+ * With allocated memory chunk [p, p + alloc_size), this
+ * test will randomly choose start/len and perform mlock/mlock2
+ * on [start, start+len] range.
+ *
+ * The memory region size alloc_size is above the rlimit.
+ * And the len to be locked is higher than rlimit.
+ * So we always expect a failure of mlock/mlock2.
+ * No locked page number should be increased as a side effect.
+ *
+ *    return value: 0 - success
+ *    else: failure
+ */
+int test_mlock_outof_limit(char *p, int alloc_size)
+{
+       int i;
+       int ret = 0;
+       int locked_vm_size = 0, old_locked_vm_size = 0;
+       struct rlimit cur;
+
+       getrlimit(RLIMIT_MEMLOCK, &cur);
+       if (cur.rlim_cur >= alloc_size) {
+               printf("alloc_size[%d] >%u rlimit, violates test condition\n",
+                               alloc_size, (unsigned int)cur.rlim_cur);
+               return -1;
+       }
+
+       old_locked_vm_size = get_proc_locked_vm_size();
+       srand(time(NULL));
+       for (i = 0; i < TEST_LOOP; i++) {
+               int is_mlock = !!(rand() % 2);
+               int lock_size = (rand() % (alloc_size - cur.rlim_cur))
+                       + cur.rlim_cur;
+               int start_offset = rand() % (alloc_size - lock_size);
+
+               if (is_mlock)
+                       ret = mlock(p + start_offset, lock_size);
+               else
+                       ret = mlock2_(p + start_offset, lock_size,
+                                       MLOCK_ONFAULT);
+               if (ret == 0) {
+                       printf("%s() succeeds? on %p(%d) mlock%p(%d)\n",
+                                       is_mlock ? "mlock" : "mlock2",
+                                       p, alloc_size,
+                                       p + start_offset, lock_size);
+                       return -1;
+               }
+       }
+
+       locked_vm_size = get_proc_locked_vm_size();
+       if (locked_vm_size != old_locked_vm_size) {
+               printf("tests leads to new mlocked page: old[%d], new[%d]\n",
+                               old_locked_vm_size,
+                               locked_vm_size);
+               return -1;
+       }
+
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       char *p = NULL;
+       int ret = 0;
+
+       if (set_cap_limits(MLOCK_RLIMIT_SIZE))
+               return -1;
+
+       p = malloc(MLOCK_WITHIN_LIMIT_SIZE);
+       if (p == NULL) {
+               perror("malloc() failure\n");
+               return -1;
+       }
+       ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
+       if (ret)
+               return ret;
+       munlock(p, MLOCK_WITHIN_LIMIT_SIZE);
+       free(p);
+
+
+       p = malloc(MLOCK_OUTOF_LIMIT_SIZE);
+       if (p == NULL) {
+               perror("malloc() failure\n");
+               return -1;
+       }
+       ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
+       if (ret)
+               return ret;
+       munlock(p, MLOCK_OUTOF_LIMIT_SIZE);
+       free(p);
+
+       return 0;
+}
index 02ca5e0177c539c79b3fbb53d31ed3da7e4ba9a4..ff0cda2b19c97fd0ad7f75906e9c348094934a7b 100644 (file)
@@ -1,33 +1,12 @@
 #define _GNU_SOURCE
 #include <sys/mman.h>
 #include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
 #include <sys/time.h>
 #include <sys/resource.h>
-#include <syscall.h>
-#include <errno.h>
 #include <stdbool.h>
-
-#ifndef MLOCK_ONFAULT
-#define MLOCK_ONFAULT 1
-#endif
-
-#ifndef MCL_ONFAULT
-#define MCL_ONFAULT (MCL_FUTURE << 1)
-#endif
-
-static int mlock2_(void *start, size_t len, int flags)
-{
-#ifdef __NR_mlock2
-       return syscall(__NR_mlock2, start, len, flags);
-#else
-       errno = ENOSYS;
-       return -1;
-#endif
-}
+#include "mlock2.h"
 
 struct vm_boundaries {
        unsigned long start;
@@ -138,46 +117,6 @@ static uint64_t get_kpageflags(unsigned long pfn)
        return flags;
 }
 
-static FILE *seek_to_smaps_entry(unsigned long addr)
-{
-       FILE *file;
-       char *line = NULL;
-       size_t size = 0;
-       unsigned long start, end;
-       char perms[5];
-       unsigned long offset;
-       char dev[32];
-       unsigned long inode;
-       char path[BUFSIZ];
-
-       file = fopen("/proc/self/smaps", "r");
-       if (!file) {
-               perror("fopen smaps");
-               _exit(1);
-       }
-
-       while (getline(&line, &size, file) > 0) {
-               if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n",
-                          &start, &end, perms, &offset, dev, &inode, path) < 6)
-                       goto next;
-
-               if (start <= addr && addr < end)
-                       goto out;
-
-next:
-               free(line);
-               line = NULL;
-               size = 0;
-       }
-
-       fclose(file);
-       file = NULL;
-
-out:
-       free(line);
-       return file;
-}
-
 #define VMFLAGS "VmFlags:"
 
 static bool is_vmflag_set(unsigned long addr, const char *vmflag)
diff --git a/tools/testing/selftests/vm/mlock2.h b/tools/testing/selftests/vm/mlock2.h
new file mode 100644 (file)
index 0000000..b2c09b4
--- /dev/null
@@ -0,0 +1,63 @@
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef MLOCK_ONFAULT
+#define MLOCK_ONFAULT 1
+#endif
+
+#ifndef MCL_ONFAULT
+#define MCL_ONFAULT (MCL_FUTURE << 1)
+#endif
+
+static int mlock2_(void *start, size_t len, int flags)
+{
+#ifdef __NR_mlock2
+       return syscall(__NR_mlock2, start, len, flags);
+#else
+       errno = ENOSYS;
+       return -1;
+#endif
+}
+
+static FILE *seek_to_smaps_entry(unsigned long addr)
+{
+       FILE *file;
+       char *line = NULL;
+       size_t size = 0;
+       unsigned long start, end;
+       char perms[5];
+       unsigned long offset;
+       char dev[32];
+       unsigned long inode;
+       char path[BUFSIZ];
+
+       file = fopen("/proc/self/smaps", "r");
+       if (!file) {
+               perror("fopen smaps");
+               _exit(1);
+       }
+
+       while (getline(&line, &size, file) > 0) {
+               if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n",
+                          &start, &end, perms, &offset, dev, &inode, path) < 6)
+                       goto next;
+
+               if (start <= addr && addr < end)
+                       goto out;
+
+next:
+               free(line);
+               line = NULL;
+               size = 0;
+       }
+
+       fclose(file);
+       file = NULL;
+
+out:
+       free(line);
+       return file;
+}
+
This page took 0.235084 seconds and 5 git commands to generate.