F: include/linux/altera_uart.h
F: include/linux/altera_jtaguart.h
+AMAZON ETHERNET DRIVERS
+M: Netanel Belgazal <netanel@annapurnalabs.com>
+R: Saeed Bishara <saeed@annapurnalabs.com>
+R: Zorik Machulsky <zorik@annapurnalabs.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: Documentation/networking/ena.txt
+F: drivers/net/ethernet/amazon/
+
AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
M: Tom Lendacky <thomas.lendacky@amd.com>
M: Gary Hook <gary.hook@amd.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
N: sun[x456789]i
+F: arch/arm/boot/dts/ntc-gr8*
ARM/Allwinner SoC Clock Support
M: Emilio López <emilio@elopez.com.ar>
F: arch/arm/boot/dts/meson*
F: arch/arm64/boot/dts/amlogic/
F: drivers/pinctrl/meson/
+F: drivers/mmc/host/meson*
N: meson
ARM/Annapurna Labs ALPINE ARCHITECTURE
ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
M: Kukjin Kim <kgene@kernel.org>
M: Krzysztof Kozlowski <krzk@kernel.org>
+R: Javier Martinez Canillas <javier@osg.samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/plat-samsung/s5p-dev-mfc.c
F: drivers/media/platform/s5p-mfc/
-ARM/SAMSUNG S5P SERIES TV SUBSYSTEM SUPPORT
-M: Kyungmin Park <kyungmin.park@samsung.com>
-M: Tomasz Stanislawski <t.stanislaws@samsung.com>
-L: linux-arm-kernel@lists.infradead.org
-L: linux-media@vger.kernel.org
-S: Maintained
-F: drivers/media/platform/s5p-tv/
-
ARM/SAMSUNG S5P SERIES HDMI CEC SUBSYSTEM SUPPORT
M: Kyungmin Park <kyungmin.park@samsung.com>
L: linux-arm-kernel@lists.infradead.org
F: drivers/bus/uniphier-system-bus.c
F: drivers/i2c/busses/i2c-uniphier*
F: drivers/pinctrl/uniphier/
+F: drivers/reset/reset-uniphier.c
F: drivers/tty/serial/8250/8250_uniphier.c
N: uniphier
F: drivers/media/i2c/as3645a.c
F: include/media/i2c/as3645a.h
+ASAHI KASEI AK8974 DRIVER
+M: Linus Walleij <linus.walleij@linaro.org>
+L: linux-iio@vger.kernel.org
+W: http://www.akm.com/
+S: Supported
+F: drivers/iio/magnetometer/ak8974.c
+
ASC7621 HARDWARE MONITOR DRIVER
M: George Joseph <george.joseph@fairview5.com>
L: linux-hwmon@vger.kernel.org
F: drivers/net/wireless/atmel/atmel*
ATMEL MAXTOUCH DRIVER
-M: Nick Dyer <nick.dyer@itdev.co.uk>
-T: git git://github.com/atmel-maxtouch/linux.git
-S: Supported
+M: Nick Dyer <nick@shmanahar.org>
+T: git git://github.com/ndyer/linux.git
+S: Maintained
F: Documentation/devicetree/bindings/input/atmel,maxtouch.txt
F: drivers/input/touchscreen/atmel_mxt_ts.c
F: include/linux/platform_data/atmel_mxt_ts.h
BONDING DRIVER
M: Jay Vosburgh <j.vosburgh@gmail.com>
M: Veaceslav Falico <vfalico@gmail.com>
-M: Andy Gospodarek <gospo@cumulusnetworks.com>
+M: Andy Gospodarek <andy@greyhouse.net>
L: netdev@vger.kernel.org
W: http://sourceforge.net/projects/bonding/
S: Supported
F: drivers/iio/light/cm*
F: Documentation/devicetree/bindings/i2c/trivial-devices.txt
+CAVIUM I2C DRIVER
+M: Jan Glauber <jglauber@cavium.com>
+M: David Daney <david.daney@cavium.com>
+W: http://www.cavium.com
+S: Supported
+F: drivers/i2c/busses/i2c-octeon*
+F: drivers/i2c/busses/i2c-thunderx*
+
CAVIUM LIQUIDIO NETWORK DRIVER
M: Derek Chickles <derek.chickles@caviumnetworks.com>
M: Satanand Burla <satananda.burla@caviumnetworks.com>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
W: http://coccinelle.lip6.fr/
S: Supported
-F: Documentation/coccinelle.txt
+F: Documentation/dev-tools/coccinelle.rst
F: scripts/coccinelle/
F: scripts/coccicheck
M: Michael Turquette <mturquette@baylibre.com>
M: Stephen Boyd <sboyd@codeaurora.org>
L: linux-clk@vger.kernel.org
+Q: http://patchwork.kernel.org/project/linux-clk/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git
S: Maintained
F: Documentation/devicetree/bindings/clock/
F: drivers/net/wan/cosa*
CPMAC ETHERNET DRIVER
-M: Florian Fainelli <florian@openwrt.org>
+M: Florian Fainelli <f.fainelli@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/ti/cpmac.c
F: drivers/mfd/da903x.c
F: drivers/mfd/da90??-*.c
F: drivers/mfd/da91??-*.c
-F: drivers/power/da9052-battery.c
-F: drivers/power/da91??-*.c
+F: drivers/power/supply/da9052-battery.c
+F: drivers/power/supply/da91??-*.c
F: drivers/regulator/da903x.c
F: drivers/regulator/da9???-regulator.[ch]
F: drivers/rtc/rtc-da90??.c
F: include/sound/da[79]*.h
F: sound/soc/codecs/da[79]*.[ch]
+DIAMOND SYSTEMS GPIO-MM GPIO DRIVER
+M: William Breathitt Gray <vilhelm.gray@gmail.com>
+L: linux-gpio@vger.kernel.org
+S: Maintained
+F: drivers/gpio/gpio-gpio-mm.c
+
DIGI NEO AND CLASSIC PCI PRODUCTS
M: Lidza Louina <lidza.louina@gmail.com>
M: Mark Hounschell <markh@compro.net>
F: drivers/gpu/drm/i810/
F: include/uapi/drm/i810_drm.h
+DRM DRIVERS FOR MEDIATEK
+M: CK Hu <ck.hu@mediatek.com>
+M: Philipp Zabel <p.zabel@pengutronix.de>
+L: dri-devel@lists.freedesktop.org
+S: Supported
+F: drivers/gpu/drm/mediatek/
+F: Documentation/devicetree/bindings/display/mediatek/
+
DRM DRIVER FOR MSM ADRENO GPU
M: Rob Clark <robdclark@gmail.com>
L: linux-arm-msm@vger.kernel.org
F: fs/ecryptfs/
EDAC-CORE
-M: Doug Thompson <dougthompson@xmission.com>
M: Borislav Petkov <bp@alien8.de>
M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
F: include/linux/edac.h
EDAC-AMD64
-M: Doug Thompson <dougthompson@xmission.com>
M: Borislav Petkov <bp@alien8.de>
L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/edac/amd64_edac*
EDAC-CALXEDA
-M: Doug Thompson <dougthompson@xmission.com>
M: Robert Richter <rric@kernel.org>
L: linux-edac@vger.kernel.org
S: Maintained
EDAC-E752X
M: Mark Gross <mark.gross@intel.com>
-M: Doug Thompson <dougthompson@xmission.com>
L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/edac/e752x_edac.c
EDAC-E7XXX
-M: Doug Thompson <dougthompson@xmission.com>
L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/edac/e7xxx_edac.c
+EDAC-FSL_DDR
+M: York Sun <york.sun@nxp.com>
+L: linux-edac@vger.kernel.org
+S: Maintained
+F: drivers/edac/fsl_ddr_edac.*
+
EDAC-GHES
M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
F: drivers/edac/i82443bxgx_edac.c
EDAC-I3000
-M: Jason Uhlenkott <juhlenko@akamai.com>
L: linux-edac@vger.kernel.org
-S: Maintained
+S: Orphan
F: drivers/edac/i3000_edac.c
EDAC-I5000
-M: Doug Thompson <dougthompson@xmission.com>
L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/edac/i5000_edac.c
M: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
S: Maintained
F: kernel/gcov/
-F: Documentation/gcov.txt
+F: Documentation/dev-tools/gcov.rst
GDT SCSI DISK ARRAY CONTROLLER DRIVER
M: Achim Leubner <achim_leubner@adaptec.com>
F: drivers/net/ethernet/hisilicon/
F: Documentation/devicetree/bindings/net/hisilicon*.txt
+HISILICON ROCE DRIVER
+M: Lijun Ou <oulijun@huawei.com>
+M: Wei Hu(Xavier) <xavier.huwei@huawei.com>
+L: linux-rdma@vger.kernel.org
+S: Maintained
+F: drivers/infiniband/hw/hns/
+F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt
+
HISILICON SAS Controller
M: John Garry <john.garry@huawei.com>
W: http://www.hisilicon.com
S: Maintained
F: fs/hugetlbfs/
+HVA ST MEDIA DRIVER
+M: Jean-Christophe Trotin <jean-christophe.trotin@st.com>
+L: linux-media@vger.kernel.org
+T: git git://linuxtv.org/media_tree.git
+W: https://linuxtv.org
+S: Supported
+F: drivers/media/platform/sti/hva
+
Hyper-V CORE AND DRIVERS
M: "K. Y. Srinivasan" <kys@microsoft.com>
M: Haiyang Zhang <haiyangz@microsoft.com>
F: Documentation/i2c/i2c-topology
F: Documentation/i2c/muxes/
F: Documentation/devicetree/bindings/i2c/i2c-mux*
+F: Documentation/devicetree/bindings/i2c/i2c-arb*
+F: Documentation/devicetree/bindings/i2c/i2c-gate*
F: drivers/i2c/i2c-mux.c
F: drivers/i2c/muxes/
F: include/linux/i2c-mux.h
S: Maintained
F: drivers/dma/dma-jz4780.c
+INGENIC JZ4780 NAND DRIVER
+M: Harvey Hunt <harveyhuntnexus@gmail.com>
+L: linux-mtd@lists.infradead.org
+S: Maintained
+F: drivers/mtd/nand/jz4780_*
+
INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
M: Mimi Zohar <zohar@linux.vnet.ibm.com>
M: Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
S: Supported
F: drivers/idle/intel_idle.c
+INTEL INTEGRATED SENSOR HUB DRIVER
+M: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+M: Jiri Kosina <jikos@kernel.org>
+L: linux-input@vger.kernel.org
+S: Maintained
+F: drivers/hid/intel-ish-hid/
+
INTEL PSTATE DRIVER
M: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
M: Len Brown <lenb@kernel.org>
F: drivers/cpufreq/intel_pstate.c
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
- M: Maik Broemme <mbroemme@plusserver.de>
+ M: Maik Broemme <mbroemme@libmpq.org>
L: linux-fbdev@vger.kernel.org
S: Maintained
F: Documentation/fb/intelfb.txt
S: Maintained
F: arch/*/include/asm/kasan.h
F: arch/*/mm/kasan_init*
-F: Documentation/kasan.txt
+F: Documentation/dev-tools/kasan.rst
F: include/linux/kasan*.h
F: lib/test_kasan.c
F: mm/kasan/
M: Vegard Nossum <vegardno@ifi.uio.no>
M: Pekka Enberg <penberg@kernel.org>
S: Maintained
-F: Documentation/kmemcheck.txt
+F: Documentation/dev-tools/kmemcheck.rst
F: arch/x86/include/asm/kmemcheck.h
F: arch/x86/mm/kmemcheck/
F: include/linux/kmemcheck.h
KMEMLEAK
M: Catalin Marinas <catalin.marinas@arm.com>
S: Maintained
-F: Documentation/kmemleak.txt
+F: Documentation/dev-tools/kmemleak.rst
F: include/linux/kmemleak.h
F: mm/kmemleak.c
F: mm/kmemleak-test.c
F: drivers/hwmon/max20751.c
MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
-M: "Hans J. Koch" <hjk@hansjkoch.de>
L: linux-hwmon@vger.kernel.org
-S: Maintained
+S: Orphan
F: Documentation/hwmon/max6650
F: drivers/hwmon/max6650.c
M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
L: linux-pm@vger.kernel.org
S: Supported
-F: drivers/power/max14577_charger.c
-F: drivers/power/max77693_charger.c
+F: drivers/power/supply/max14577_charger.c
+F: drivers/power/supply/max77693_charger.c
MAXIM MAX77802 MULTIFUNCTION PMIC DEVICE DRIVERS
M: Javier Martinez Canillas <javier@osg.samsung.com>
S: Maintained
F: drivers/iio/potentiometer/mcp4531.c
+MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
+M: William Breathitt Gray <vilhelm.gray@gmail.com>
+L: linux-iio@vger.kernel.org
+S: Maintained
+F: drivers/iio/dac/cio-dac.c
+
MEDIA DRIVERS FOR RENESAS - FCP
M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
L: linux-media@vger.kernel.org
Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlxsw/
+MELLANOX MLXCPLD LED DRIVER
+M: Vadim Pasternak <vadimp@mellanox.com>
+L: linux-leds@vger.kernel.org
+S: Supported
+W: http://www.mellanox.com
+F: drivers/leds/leds-mlxcpld.c
+F: Documentation/leds/leds-mlxcpld.txt
+
SOFT-ROCE DRIVER (rxe)
M: Moni Shoua <monis@mellanox.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: arch/microblaze/
+MICROCHIP / ATMEL ISC DRIVER
+M: Songjun Wu <songjun.wu@microchip.com>
+L: linux-media@vger.kernel.org
+S: Supported
+F: drivers/media/platform/atmel/atmel-isc.c
+F: drivers/media/platform/atmel/atmel-isc-regs.h
+F: devicetree/bindings/media/atmel-isc.txt
+
MICROSOFT SURFACE PRO 3 BUTTON DRIVER
M: Chen Yu <yu.c.chen@intel.com>
L: platform-driver-x86@vger.kernel.org
S: Supported
F: drivers/iio/temperature/mlx90614.c
+MICROSEMI SMART ARRAY SMARTPQI DRIVER (smartpqi)
+M: Don Brace <don.brace@microsemi.com>
+L: esc.storagedev@microsemi.com
+L: linux-scsi@vger.kernel.org
+S: Supported
+F: drivers/scsi/smartpqi/smartpqi*.[ch]
+F: drivers/scsi/smartpqi/Kconfig
+F: drivers/scsi/smartpqi/Makefile
+F: include/linux/cciss*.h
+F: include/uapi/linux/cciss*.h
+F: Documentation/scsi/smartpqi.txt
+
MN88472 MEDIA DRIVER
M: Antti Palosaari <crope@iki.fi>
L: linux-media@vger.kernel.org
M: Lee Jones <lee.jones@linaro.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git
S: Supported
+F: Documentation/devicetree/bindings/mfd/
F: drivers/mfd/
F: include/linux/mfd/
F: include/linux/power/bq2415x_charger.h
F: include/linux/power/bq27xxx_battery.h
F: include/linux/power/isp1704_charger.h
-F: drivers/power/bq2415x_charger.c
-F: drivers/power/bq27xxx_battery.c
-F: drivers/power/bq27xxx_battery_i2c.c
-F: drivers/power/isp1704_charger.c
-F: drivers/power/rx51_battery.c
+F: drivers/power/supply/bq2415x_charger.c
+F: drivers/power/supply/bq27xxx_battery.c
+F: drivers/power/supply/bq27xxx_battery_i2c.c
+F: drivers/power/supply/isp1704_charger.c
+F: drivers/power/supply/rx51_battery.c
NTB DRIVER CORE
M: Jon Mason <jdmason@kudzu.us>
F: Documentation/virtual/paravirt_ops.txt
F: arch/*/kernel/paravirt*
F: arch/*/include/asm/paravirt.h
+F: include/linux/hypervisor.h
PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
M: Tim Waugh <tim@cyberelk.net>
POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
M: Sebastian Reichel <sre@kernel.org>
-M: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
-M: David Woodhouse <dwmw2@infradead.org>
L: linux-pm@vger.kernel.org
-T: git git://git.infradead.org/battery-2.6.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
S: Maintained
-F: Documentation/devicetree/bindings/power/
-F: Documentation/devicetree/bindings/power_supply/
+F: Documentation/devicetree/bindings/power/supply/
F: include/linux/power_supply.h
-F: drivers/power/
-X: drivers/power/avs/
+F: drivers/power/supply/
POWER STATE COORDINATION INTERFACE (PSCI)
M: Mark Rutland <mark.rutland@arm.com>
S: Supported
F: drivers/net/wireless/ath/ath10k/
+QUALCOMM EMAC GIGABIT ETHERNET DRIVER
+M: Timur Tabi <timur@codeaurora.org>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/qualcomm/emac/
+
QUALCOMM HEXAGON ARCHITECTURE
M: Richard Kuo <rkuo@codeaurora.org>
L: linux-hexagon@vger.kernel.org
F: Documentation/rpmsg.txt
F: include/linux/rpmsg.h
+RENESAS CLOCK DRIVERS
+M: Geert Uytterhoeven <geert+renesas@glider.be>
+L: linux-renesas-soc@vger.kernel.org
+S: Supported
+F: drivers/clk/renesas/
+
RENESAS ETHERNET DRIVERS
R: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
L: netdev@vger.kernel.org
RHASHTABLE
M: Thomas Graf <tgraf@suug.ch>
+M: Herbert Xu <herbert@gondor.apana.org.au>
L: netdev@vger.kernel.org
S: Maintained
F: lib/rhashtable.c
TI BQ27XXX POWER SUPPLY DRIVER
R: Andrew F. Davis <afd@ti.com>
F: include/linux/power/bq27xxx_battery.h
-F: drivers/power/bq27xxx_battery.c
-F: drivers/power/bq27xxx_battery_i2c.c
+F: drivers/power/supply/bq27xxx_battery.c
+F: drivers/power/supply/bq27xxx_battery_i2c.c
TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
M: John Stultz <john.stultz@linaro.org>
F: drivers/misc/phantom.c
F: include/uapi/linux/phantom.h
-SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER
-M: Jayamohan Kallickal <jayamohan.kallickal@avagotech.com>
-M: Ketan Mukadam <ketan.mukadam@avagotech.com>
-M: John Soni Jose <sony.john@avagotech.com>
+Emulex 10Gbps iSCSI - OneConnect DRIVER
+M: Subbu Seetharaman <subbu.seetharaman@broadcom.com>
+M: Ketan Mukadam <ketan.mukadam@broadcom.com>
+M: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
L: linux-scsi@vger.kernel.org
-W: http://www.avagotech.com
+W: http://www.broadcom.com
S: Supported
F: drivers/scsi/be2iscsi/
F: drivers/spi/
F: include/linux/spi/
F: include/uapi/linux/spi/
+F: tools/spi/
SPIDERNET NETWORK DRIVER for CELL
M: Ishizaki Kou <kou.ishizaki@toshiba.co.jp>
STAGING - LUSTRE PARALLEL FILESYSTEM
M: Oleg Drokin <oleg.drokin@intel.com>
M: Andreas Dilger <andreas.dilger@intel.com>
+M: James Simmons <jsimmons@infradead.org>
L: lustre-devel@lists.lustre.org (moderated for non-subscribers)
W: http://wiki.lustre.org/
S: Maintained
S: Odd Fixes
F: drivers/staging/rtl8712/
-STAGING - REALTEK RTL8723U WIRELESS DRIVER
-M: Larry Finger <Larry.Finger@lwfinger.net>
-M: Jes Sorensen <Jes.Sorensen@redhat.com>
-L: linux-wireless@vger.kernel.org
-S: Maintained
-F: drivers/staging/rtl8723au/
-
STAGING - SILICON MOTION SM750 FRAME BUFFER DRIVER
M: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
M: Teddy Wang <teddy.wang@siliconmotion.com>
S: Supported
F: drivers/mfd/syscon.c
+SYSTEM RESET/SHUTDOWN DRIVERS
+M: Sebastian Reichel <sre@kernel.org>
+L: linux-pm@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
+S: Maintained
+F: Documentation/devicetree/bindings/power/reset/
+F: drivers/power/reset/
+
SYSV FILESYSTEM
M: Christoph Hellwig <hch@infradead.org>
S: Maintained
TI LP8727 CHARGER DRIVER
M: Milo Kim <milo.kim@ti.com>
S: Maintained
-F: drivers/power/lp8727_charger.c
+F: drivers/power/supply/lp8727_charger.c
F: include/linux/platform_data/lp8727.h
TI LP8788 MFD DRIVER
F: drivers/iio/adc/lp8788_adc.c
F: drivers/leds/leds-lp8788.c
F: drivers/mfd/lp8788*.c
-F: drivers/power/lp8788-charger.c
+F: drivers/power/supply/lp8788-charger.c
F: drivers/regulator/lp8788-*.c
F: include/linux/mfd/lp8788*.h
S: Odd fixes
F: drivers/media/usb/tm6000/
+TW5864 VIDEO4LINUX DRIVER
+M: Bluecherry Maintainers <maintainers@bluecherrydvr.com>
+M: Andrey Utkin <andrey.utkin@corp.bluecherry.net>
+M: Andrey Utkin <andrey_utkin@fastmail.com>
+L: linux-media@vger.kernel.org
+S: Supported
+F: drivers/media/pci/tw5864/
+
TW68 VIDEO4LINUX DRIVER
M: Hans Verkuil <hverkuil@xs4all.nl>
L: linux-media@vger.kernel.org
F: drivers/net/usb/lan78xx.*
USB MASS STORAGE DRIVER
-M: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
+M: Alan Stern <stern@rowland.harvard.edu>
L: linux-usb@vger.kernel.org
L: usb-storage@lists.one-eyed-alien.net
S: Maintained
USB SMSC95XX ETHERNET DRIVER
M: Steve Glendinning <steve.glendinning@shawell.net>
+M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/usb/smsc95xx.*
F: fs/hppfs/
USERSPACE I/O (UIO)
-M: "Hans J. Koch" <hjk@hansjkoch.de>
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
F: net/8021q/
VLYNQ BUS
- M: Florian Fainelli <florian@openwrt.org>
+ M: Florian Fainelli <f.fainelli@gmail.com>
L: openwrt-devel@lists.openwrt.org (subscribers-only)
S: Maintained
F: drivers/vlynq/vlynq.c
F: drivers/mfd/arizona*
F: drivers/mfd/wm*.c
F: drivers/mfd/cs47l24*
-F: drivers/power/wm83*.c
+F: drivers/power/supply/wm83*.c
F: drivers/rtc/rtc-wm83*.c
F: drivers/regulator/wm8*.c
F: drivers/video/backlight/wm83*_bl.c
static DECLARE_COMPLETION(cpu_running);
-static struct smp_operations smp_ops;
+static struct smp_operations smp_ops __ro_after_init;
void __init smp_set_ops(const struct smp_operations *ops)
{
static void raise_nmi(cpumask_t *mask)
{
- /*
- * Generate the backtrace directly if we are running in a calling
- * context that is not preemptible by the backtrace IPI. Note
- * that nmi_cpu_backtrace() automatically removes the current cpu
- * from mask.
- */
- if (cpumask_test_cpu(smp_processor_id(), mask) && irqs_disabled())
- nmi_cpu_backtrace(NULL);
-
smp_cross_call(mask, IPI_CPU_BACKTRACE);
}
- void arch_trigger_all_cpu_backtrace(bool include_self)
+ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
- nmi_trigger_all_cpu_backtrace(include_self, raise_nmi);
+ nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_nmi);
}
static void tls_thread_flush(void)
{
- asm ("msr tpidr_el0, xzr");
+ write_sysreg(0, tpidr_el0);
if (is_compat_task()) {
current->thread.tp_value = 0;
* with a stale shadow state during context switch.
*/
barrier();
- asm ("msr tpidrro_el0, xzr");
+ write_sysreg(0, tpidrro_el0);
}
}
* Read the current TLS pointer from tpidr_el0 as it may be
* out-of-sync with the saved value.
*/
- asm("mrs %0, tpidr_el0" : "=r" (*task_user_tls(p)));
+ *task_user_tls(p) = read_sysreg(tpidr_el0);
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
{
unsigned long tpidr, tpidrro;
- asm("mrs %0, tpidr_el0" : "=r" (tpidr));
+ tpidr = read_sysreg(tpidr_el0);
*task_user_tls(current) = tpidr;
tpidr = *task_user_tls(next);
tpidrro = is_compat_thread(task_thread_info(next)) ?
next->thread.tp_value : 0;
- asm(
- " msr tpidr_el0, %0\n"
- " msr tpidrro_el0, %1"
- : : "r" (tpidr), "r" (tpidrro));
+ write_sysreg(tpidr, tpidr_el0);
+ write_sysreg(tpidrro, tpidrro_el0);
}
/* Restore the UAO state depending on next's addr_limit */
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
- unsigned long range_end = mm->brk;
-
if (is_compat_task())
- range_end += 0x02000000;
+ return randomize_page(mm->brk, 0x02000000);
else
- range_end += 0x40000000;
-
- return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+ return randomize_page(mm->brk, 0x40000000);
}
ENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
+ CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
HYPERVISOR_TEXT
_data = .;
_sdata = .;
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+ /*
+ * Data written with the MMU off but read with the MMU on requires
+ * cache lines to be invalidated, discarding up to a Cache Writeback
+ * Granule (CWG) of data from the cache. Keep the section that
+ * requires this type of maintenance to be in its own Cache Writeback
+ * Granule (CWG) area so the cache maintenance operations don't
+ * interfere with adjacent data.
+ */
+ .mmuoff.data.write : ALIGN(SZ_2K) {
+ __mmuoff_data_start = .;
+ *(.mmuoff.data.write)
+ }
+ . = ALIGN(SZ_2K);
+ .mmuoff.data.read : {
+ *(.mmuoff.data.read)
+ __mmuoff_data_end = .;
+ }
+
PECOFF_EDATA_PADDING
_edata = .;
extern void pci_console_init(const char *arg);
#endif
-static unsigned long long MAX_MEMORY = 512ull << 20;
+static unsigned long long max_memory = ULLONG_MAX;
+static unsigned long long reserve_low_mem;
DEFINE_SEMAPHORE(octeon_bootbus_sem);
EXPORT_SYMBOL(octeon_bootbus_sem);
struct cvmx_bootinfo *octeon_bootinfo;
EXPORT_SYMBOL(octeon_bootinfo);
-static unsigned long long RESERVE_LOW_MEM = 0ull;
#ifdef CONFIG_KEXEC
#ifdef CONFIG_SMP
/*
bootmem_desc->major_version = CVMX_BOOTMEM_DESC_MAJ_VER;
bootmem_desc->minor_version = CVMX_BOOTMEM_DESC_MIN_VER;
- addr = (OCTEON_DDR0_BASE + RESERVE_LOW_MEM + low_reserved_bytes);
+ addr = (OCTEON_DDR0_BASE + reserve_low_mem + low_reserved_bytes);
bootmem_desc->head_addr = 0;
if (mem_size <= OCTEON_DDR0_SIZE) {
__cvmx_bootmem_phy_free(addr,
- mem_size - RESERVE_LOW_MEM -
+ mem_size - reserve_low_mem -
low_reserved_bytes, 0);
return;
}
__cvmx_bootmem_phy_free(addr,
- OCTEON_DDR0_SIZE - RESERVE_LOW_MEM -
+ OCTEON_DDR0_SIZE - reserve_low_mem -
low_reserved_bytes, 0);
mem_size -= OCTEON_DDR0_SIZE;
default_machine_crash_shutdown(regs);
}
+ #ifdef CONFIG_SMP
+ void octeon_crash_smp_send_stop(void)
+ {
+ int cpu;
+
+ /* disable watchdogs */
+ for_each_online_cpu(cpu)
+ cvmx_write_csr(CVMX_CIU_WDOGX(cpu_logical_map(cpu)), 0);
+ }
+ #endif
+
#endif /* CONFIG_KEXEC */
#ifdef CONFIG_CAVIUM_RESERVE32
/* Default to 64MB in the simulator to speed things up */
if (octeon_is_simulation())
- MAX_MEMORY = 64ull << 20;
+ max_memory = 64ull << 20;
arg = strstr(arcs_cmdline, "mem=");
if (arg) {
- MAX_MEMORY = memparse(arg + 4, &p);
- if (MAX_MEMORY == 0)
- MAX_MEMORY = 32ull << 30;
+ max_memory = memparse(arg + 4, &p);
+ if (max_memory == 0)
+ max_memory = 32ull << 30;
if (*p == '@')
- RESERVE_LOW_MEM = memparse(p + 1, &p);
+ reserve_low_mem = memparse(p + 1, &p);
}
arcs_cmdline[0] = 0;
cvmx_phys_to_ptr(octeon_boot_desc_ptr->argv[i]);
if ((strncmp(arg, "MEM=", 4) == 0) ||
(strncmp(arg, "mem=", 4) == 0)) {
- MAX_MEMORY = memparse(arg + 4, &p);
- if (MAX_MEMORY == 0)
- MAX_MEMORY = 32ull << 30;
+ max_memory = memparse(arg + 4, &p);
+ if (max_memory == 0)
+ max_memory = 32ull << 30;
if (*p == '@')
- RESERVE_LOW_MEM = memparse(p + 1, &p);
+ reserve_low_mem = memparse(p + 1, &p);
#ifdef CONFIG_KEXEC
} else if (strncmp(arg, "crashkernel=", 12) == 0) {
crashk_size = memparse(arg+12, &p);
_machine_kexec_shutdown = octeon_shutdown;
_machine_crash_shutdown = octeon_crash_shutdown;
_machine_kexec_prepare = octeon_kexec_prepare;
+ #ifdef CONFIG_SMP
+ _crash_smp_send_stop = octeon_crash_smp_send_stop;
+ #endif
#endif
octeon_user_io_init();
* to consistently work.
*/
mem_alloc_size = 4 << 20;
- if (mem_alloc_size > MAX_MEMORY)
- mem_alloc_size = MAX_MEMORY;
+ if (mem_alloc_size > max_memory)
+ mem_alloc_size = max_memory;
/* Crashkernel ignores bootmem list. It relies on mem=X@Y option */
#ifdef CONFIG_CRASH_DUMP
- add_memory_region(RESERVE_LOW_MEM, MAX_MEMORY, BOOT_MEM_RAM);
- total += MAX_MEMORY;
+ add_memory_region(reserve_low_mem, max_memory, BOOT_MEM_RAM);
+ total += max_memory;
#else
#ifdef CONFIG_KEXEC
if (crashk_size > 0) {
*/
cvmx_bootmem_lock();
while ((boot_mem_map.nr_map < BOOT_MEM_MAP_MAX)
- && (total < MAX_MEMORY)) {
+ && (total < max_memory)) {
memory = cvmx_bootmem_phy_alloc(mem_alloc_size,
__pa_symbol(&_end), -1,
0x100000,
DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
#endif
-/* Static state in head.S used to set up a CPU */
-extern unsigned long stack_start; /* Initial stack pointer address */
-
struct task_struct;
struct smp_ops {
void (*smp_cpus_done)(unsigned max_cpus);
void (*stop_other_cpus)(int wait);
+ void (*crash_stop_other_cpus)(void);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
while (nr_map) {
u64 start = biosmap->addr;
u64 size = biosmap->size;
- u64 end = start + size;
+ u64 end = start + size - 1;
u32 type = biosmap->type;
/* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
+ if (start > end && likely(size))
return -1;
e820_add_region(start, size, type);
nr_free_pages += end_pfn - start_pfn;
}
- set_dma_reserve(nr_pages - nr_free_pages);
+ set_memory_reserve(nr_pages - nr_free_pages, false);
#endif
}
#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/vm86.h>
+#include <asm/switch_to.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
/*
* We use this if we don't have any better idle routine..
*/
- void default_idle(void)
+ void __cpuidle default_idle(void)
{
trace_cpu_idle_rcuidle(1, smp_processor_id());
safe_halt();
* with interrupts enabled and no flags, which is backwards compatible with the
* original MWAIT implementation.
*/
- static void mwait_idle(void)
+ static __cpuidle void mwait_idle(void)
{
if (!current_set_polling_and_test()) {
trace_cpu_idle_rcuidle(1, smp_processor_id());
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
- unsigned long range_end = mm->brk + 0x02000000;
- return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+ return randomize_page(mm->brk, 0x02000000);
}
+/*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+ struct inactive_task_frame *frame =
+ (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
+ return READ_ONCE_NOCHECK(frame->ret_addr);
+}
+
/*
* Called from fs/proc with a reference on @p to find the function
* which called into schedule(). This needs to be done carefully
if (sp < bottom || sp > top)
return 0;
- fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+ fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
do {
if (fp < bottom || fp > top)
return 0;
jiffies = jiffies_64;
#endif
-#ifndef KERNELOFFSET
-#define KERNELOFFSET 0xd0003000
-#endif
-
/* Note: In the following macros, it would be nice to specify only the
vector name and section kind and construct "sym" and "section" using
CPP concatenation, but that does not work reliably. Concatenating a
VMLINUX_SYMBOL(__sched_text_start) = .;
*(.sched.literal .sched.text)
VMLINUX_SYMBOL(__sched_text_end) = .;
+ VMLINUX_SYMBOL(__cpuidle_text_start) = .;
+ *(.cpuidle.literal .cpuidle.text)
+ VMLINUX_SYMBOL(__cpuidle_text_end) = .;
VMLINUX_SYMBOL(__lock_text_start) = .;
*(.spinlock.literal .spinlock.text)
VMLINUX_SYMBOL(__lock_text_end) = .;
static void crng_reseed(struct crng_state *crng, struct entropy_store *r);
static void push_to_pool(struct work_struct *work);
-static __u32 input_pool_data[INPUT_POOL_WORDS];
-static __u32 blocking_pool_data[OUTPUT_POOL_WORDS];
+static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy;
+static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy;
static struct entropy_store input_pool = {
.poolinfo = &poolinfo_table[0],
}
EXPORT_SYMBOL(get_random_long);
- /*
- * randomize_range() returns a start address such that
+ /**
+ * randomize_page - Generate a random, page aligned address
+ * @start: The smallest acceptable address the caller will take.
+ * @range: The size of the area, starting at @start, within which the
+ * random address must fall.
+ *
+ * If @start + @range would overflow, @range is capped.
*
- * [...... <range> .....]
- * start end
+ * NOTE: Historical use of randomize_range, which this replaces, presumed that
+ * @start was already page aligned. We now align it regardless.
*
- * a <range> with size "len" starting at the return value is inside in the
- * area defined by [start, end], but is otherwise randomized.
+ * Return: A page aligned address within [start, start + range). On error,
+ * @start is returned.
*/
unsigned long
- randomize_range(unsigned long start, unsigned long end, unsigned long len)
+ randomize_page(unsigned long start, unsigned long range)
{
- unsigned long range = end - len - start;
+ if (!PAGE_ALIGNED(start)) {
+ range -= PAGE_ALIGN(start) - start;
+ start = PAGE_ALIGN(start);
+ }
- if (end <= start + len)
- return 0;
- return PAGE_ALIGN(get_random_int() % range + start);
+ if (start > ULONG_MAX - range)
+ range = ULONG_MAX - start;
+
+ range >>= PAGE_SHIFT;
+
+ if (range == 0)
+ return start;
+
+ return start + (get_random_long() % range << PAGE_SHIFT);
}
/* Interface for in-kernel drivers of true hardware RNGs.
int nfs_symlink(struct inode *, struct dentry *, const char *);
int nfs_link(struct dentry *, struct inode *, struct dentry *);
int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+int nfs_rename(struct inode *, struct dentry *,
+ struct inode *, struct dentry *, unsigned int);
/* file.c */
int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
loff_t i_size = i_size_read(page_file_mapping(page)->host);
if (i_size > 0) {
- pgoff_t page_index = page_file_index(page);
+ pgoff_t index = page_index(page);
pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
- if (page_index < end_index)
+ if (index < end_index)
return PAGE_SIZE;
- if (page_index == end_index)
+ if (index == end_index)
return ((i_size - 1) & ~PAGE_MASK) + 1;
}
return 0;
save_stack_trace_tsk(task, &trace);
for (i = 0; i < trace.nr_entries; i++) {
- seq_printf(m, "[<%pK>] %pS\n",
+ seq_printf(m, "[<%pK>] %pB\n",
(void *)entries[i], (void *)entries[i]);
}
unlock_trace(task);
if (!p)
return -ESRCH;
- if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
- task_lock(p);
- if (slack_ns == 0)
- p->timer_slack_ns = p->default_timer_slack_ns;
- else
- p->timer_slack_ns = slack_ns;
- task_unlock(p);
- } else
- count = -EPERM;
+ if (p != current) {
+ if (!capable(CAP_SYS_NICE)) {
+ count = -EPERM;
+ goto out;
+ }
+
+ err = security_task_setscheduler(p);
+ if (err) {
+ count = err;
+ goto out;
+ }
+ }
+
+ task_lock(p);
+ if (slack_ns == 0)
+ p->timer_slack_ns = p->default_timer_slack_ns;
+ else
+ p->timer_slack_ns = slack_ns;
+ task_unlock(p);
+ out:
put_task_struct(p);
return count;
{
struct inode *inode = m->private;
struct task_struct *p;
- int err = 0;
+ int err = 0;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
- if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
- task_lock(p);
- seq_printf(m, "%llu\n", p->timer_slack_ns);
- task_unlock(p);
- } else
- err = -EPERM;
+ if (p != current) {
+
+ if (!capable(CAP_SYS_NICE)) {
+ err = -EPERM;
+ goto out;
+ }
+ err = security_task_getscheduler(p);
+ if (err)
+ goto out;
+ }
+ task_lock(p);
+ seq_printf(m, "%llu\n", p->timer_slack_ns);
+ task_unlock(p);
+
+ out:
put_task_struct(p);
return err;
mss->anonymous_thp += HPAGE_PMD_SIZE;
else if (PageSwapBacked(page))
mss->shmem_thp += HPAGE_PMD_SIZE;
+ else if (is_zone_device_page(page))
+ /* pass */;
else
VM_BUG_ON_PAGE(1, page);
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
}
mmu_notifier_invalidate_range_start(mm, 0, -1);
}
- walk_page_range(0, ~0UL, &clear_refs_walk);
+ walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
if (type == CLEAR_REFS_SOFT_DIRTY)
mmu_notifier_invalidate_range_end(mm, 0, -1);
flush_tlb_mm(mm);
*(.dtb.init.rodata) \
VMLINUX_SYMBOL(__dtb_end) = .;
-/* .data section */
+/*
+ * .data section
+ * -fdata-sections generates .data.identifier which needs to be pulled in
+ * with .data, but don't want to pull in .data..stuff which has its own
+ * requirements. Same for bss.
+ */
#define DATA_DATA \
- *(.data) \
+ *(.data .data.[0-9a-zA-Z_]*) \
*(.ref.data) \
*(.data..shared_aligned) /* percpu related */ \
MEM_KEEP(init.data) \
/* Kernel symbol table: Normal symbols */ \
__ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab) = .; \
- *(SORT(___ksymtab+*)) \
+ KEEP(*(SORT(___ksymtab+*))) \
VMLINUX_SYMBOL(__stop___ksymtab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \
- *(SORT(___ksymtab_gpl+*)) \
+ KEEP(*(SORT(___ksymtab_gpl+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \
- *(SORT(___ksymtab_unused+*)) \
+ KEEP(*(SORT(___ksymtab_unused+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \
- *(SORT(___ksymtab_unused_gpl+*)) \
+ KEEP(*(SORT(___ksymtab_unused_gpl+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \
- *(SORT(___ksymtab_gpl_future+*)) \
+ KEEP(*(SORT(___ksymtab_gpl_future+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \
} \
\
/* Kernel symbol table: Normal symbols */ \
__kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab) = .; \
- *(SORT(___kcrctab+*)) \
+ KEEP(*(SORT(___kcrctab+*))) \
VMLINUX_SYMBOL(__stop___kcrctab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl) = .; \
- *(SORT(___kcrctab_gpl+*)) \
+ KEEP(*(SORT(___kcrctab_gpl+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused) = .; \
- *(SORT(___kcrctab_unused+*)) \
+ KEEP(*(SORT(___kcrctab_unused+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .; \
- *(SORT(___kcrctab_unused_gpl+*)) \
+ KEEP(*(SORT(___kcrctab_unused_gpl+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .; \
- *(SORT(___kcrctab_gpl_future+*)) \
+ KEEP(*(SORT(___kcrctab_gpl_future+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
} \
\
/* Kernel symbol table: strings */ \
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
- *(__ksymtab_strings) \
+ KEEP(*(__ksymtab_strings)) \
} \
\
/* __*init sections */ \
#define SECURITY_INIT \
.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__security_initcall_start) = .; \
- *(.security_initcall.init) \
+ KEEP(*(.security_initcall.init)) \
VMLINUX_SYMBOL(__security_initcall_end) = .; \
}
* during second ld run in second ld pass when generating System.map */
#define TEXT_TEXT \
ALIGN_FUNCTION(); \
- *(.text.hot .text .text.fixup .text.unlikely) \
+ *(.text.hot .text .text.fixup .text.unlikely .text.*) \
*(.ref.text) \
MEM_KEEP(init.text) \
MEM_KEEP(exit.text) \
*(.spinlock.text) \
VMLINUX_SYMBOL(__lock_text_end) = .;
+ #define CPUIDLE_TEXT \
+ ALIGN_FUNCTION(); \
+ VMLINUX_SYMBOL(__cpuidle_text_start) = .; \
+ *(.cpuidle.text) \
+ VMLINUX_SYMBOL(__cpuidle_text_end) = .;
+
#define KPROBES_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__kprobes_text_start) = .; \
/* init and exit section handling */
#define INIT_DATA \
+ KEEP(*(SORT(___kentry+*))) \
*(.init.data) \
MEM_DISCARD(init.data) \
KERNEL_CTORS() \
BSS_FIRST_SECTIONS \
*(.bss..page_aligned) \
*(.dynbss) \
- *(.bss) \
+ *(.bss .bss.[0-9a-zA-Z_]*) \
*(COMMON) \
}
#define INIT_CALLS_LEVEL(level) \
VMLINUX_SYMBOL(__initcall##level##_start) = .; \
- *(.initcall##level##.init) \
- *(.initcall##level##s.init) \
+ KEEP(*(.initcall##level##.init)) \
+ KEEP(*(.initcall##level##s.init)) \
#define INIT_CALLS \
VMLINUX_SYMBOL(__initcall_start) = .; \
- *(.initcallearly.init) \
+ KEEP(*(.initcallearly.init)) \
INIT_CALLS_LEVEL(0) \
INIT_CALLS_LEVEL(1) \
INIT_CALLS_LEVEL(2) \
#define CON_INITCALL \
VMLINUX_SYMBOL(__con_initcall_start) = .; \
- *(.con_initcall.init) \
+ KEEP(*(.con_initcall.init)) \
VMLINUX_SYMBOL(__con_initcall_end) = .;
#define SECURITY_INITCALL \
VMLINUX_SYMBOL(__security_initcall_start) = .; \
- *(.security_initcall.init) \
+ KEEP(*(.security_initcall.init)) \
VMLINUX_SYMBOL(__security_initcall_end) = .;
#ifdef CONFIG_BLK_DEV_INITRD
#define INIT_RAM_FS \
. = ALIGN(4); \
VMLINUX_SYMBOL(__initramfs_start) = .; \
- *(.init.ramfs) \
+ KEEP(*(.init.ramfs)) \
. = ALIGN(8); \
- *(.init.ramfs.info)
+ KEEP(*(.init.ramfs.info))
#else
#define INIT_RAM_FS
#endif
#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */
#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
-#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
- * not handling interrupts, soon dead.
- * Called on the dying cpu, interrupts
- * are already disabled. Must not
- * sleep, must not fail */
#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
* lock is dropped */
-#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running.
- * Called on the new cpu, just before
- * enabling interrupts. Must not sleep,
- * must not fail */
#define CPU_BROKEN 0x000B /* CPU (unsigned)v did not die properly,
* perhaps due to preemption. */
#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
-#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)
-#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN)
-
#ifdef CONFIG_SMP
extern bool cpuhp_tasks_frozen;
#endif /* CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_PM_SLEEP_SMP
-extern int disable_nonboot_cpus(void);
+extern int freeze_secondary_cpus(int primary);
+static inline int disable_nonboot_cpus(void)
+{
+ return freeze_secondary_cpus(0);
+}
extern void enable_nonboot_cpus(void);
#else /* !CONFIG_PM_SLEEP_SMP */
static inline int disable_nonboot_cpus(void) { return 0; }
void cpu_idle_poll_ctrl(bool enable);
+ /* Attach to any functions which should be considered cpuidle. */
+ #define __cpuidle __attribute__((__section__(".cpuidle.text")))
+
+ bool cpu_in_idle(unsigned long pc);
+
void arch_cpu_idle(void);
void arch_cpu_idle_prepare(void);
void arch_cpu_idle_enter(void);
};
extern void add_device_randomness(const void *, unsigned int);
+
+#if defined(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) && !defined(__CHECKER__)
+static inline void add_latent_entropy(void)
+{
+ add_device_randomness((const void *)&latent_entropy,
+ sizeof(latent_entropy));
+}
+#else
+static inline void add_latent_entropy(void) {}
+#endif
+
extern void add_input_randomness(unsigned int type, unsigned int code,
- unsigned int value);
-extern void add_interrupt_randomness(int irq, int irq_flags);
+ unsigned int value) __latent_entropy;
+extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
extern void get_random_bytes(void *buf, int nbytes);
extern int add_random_ready_callback(struct random_ready_callback *rdy);
unsigned int get_random_int(void);
unsigned long get_random_long(void);
- unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
+ unsigned long randomize_page(unsigned long start, unsigned long range);
u32 prandom_u32(void);
void prandom_bytes(void *buf, size_t nbytes);
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/kref.h>
+#include <linux/percpu.h>
+ #include <linux/irq_work.h>
/*
* Tracks changes to rchan/rchan_buf structs
size_t subbufs_consumed; /* count of sub-buffers consumed */
struct rchan *chan; /* associated channel */
wait_queue_head_t read_wait; /* reader wait queue */
- struct timer_list timer; /* reader wake-up timer */
+ struct irq_work wakeup_work; /* reader wakeup */
struct dentry *dentry; /* channel file dentry */
struct kref kref; /* channel buffer refcount */
struct page **page_array; /* array of current buffer pages */
struct kref kref; /* channel refcount */
void *private_data; /* for user-defined data */
size_t last_toobig; /* tried to log event > subbuf size */
- struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
+ struct rchan_buf ** __percpu buf; /* per-cpu channel buffers */
int is_global; /* One global buffer ? */
struct list_head list; /* for channel list */
struct dentry *parent; /* parent dentry passed to open */
struct rchan_buf *buf;
local_irq_save(flags);
- buf = chan->buf[smp_processor_id()];
+ buf = *this_cpu_ptr(chan->buf);
if (unlikely(buf->offset + length > chan->subbuf_size))
length = relay_switch_subbuf(buf, length);
memcpy(buf->data + buf->offset, data, length);
{
struct rchan_buf *buf;
- buf = chan->buf[get_cpu()];
+ buf = *get_cpu_ptr(chan->buf);
if (unlikely(buf->offset + length > buf->chan->subbuf_size))
length = relay_switch_subbuf(buf, length);
memcpy(buf->data + buf->offset, data, length);
buf->offset += length;
- put_cpu();
+ put_cpu_ptr(chan->buf);
}
/**
*/
static inline void *relay_reserve(struct rchan *chan, size_t length)
{
- void *reserved;
- struct rchan_buf *buf = chan->buf[smp_processor_id()];
+ void *reserved = NULL;
+ struct rchan_buf *buf = *get_cpu_ptr(chan->buf);
if (unlikely(buf->offset + length > buf->chan->subbuf_size)) {
length = relay_switch_subbuf(buf, length);
if (!length)
- return NULL;
+ goto end;
}
reserved = buf->data + buf->offset;
buf->offset += length;
+end:
+ put_cpu_ptr(chan->buf);
return reserved;
}
*/
extern const struct file_operations relay_file_operations;
+#ifdef CONFIG_RELAY
+int relay_prepare_cpu(unsigned int cpu);
+#else
+#define relay_prepare_cpu NULL
+#endif
+
#endif /* _LINUX_RELAY_H */
#define MMF_HAS_UPROBES 19 /* has uprobes */
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
- #define MMF_OOM_REAPED 21 /* mm has been already reaped */
- #define MMF_OOM_NOT_REAPABLE 22 /* mm couldn't be reaped */
+ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */
+ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */
+ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
atomic_t sigcnt;
atomic_t live;
int nr_threads;
- atomic_t oom_victims; /* # of TIF_MEDIE threads in this thread group */
struct list_head thread_head;
wait_queue_head_t wait_chldexit; /* for wait4() */
short oom_score_adj; /* OOM kill score adjustment */
short oom_score_adj_min; /* OOM kill score adjustment min value.
* Only settable by CAP_SYS_RESOURCE. */
+ struct mm_struct *oom_mm; /* recorded mm when the thread group got
+ * killed by the oom killer */
struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
-#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu power */
+#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */
+#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */
#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
#endif
+#ifdef CONFIG_VMAP_STACK
+ struct vm_struct *stack_vm_area;
+#endif
/* CPU-specific state of this task */
struct thread_struct thread;
/*
# define arch_task_struct_size (sizeof(struct task_struct))
#endif
+#ifdef CONFIG_VMAP_STACK
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+ return t->stack_vm_area;
+}
+#else
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+ return NULL;
+}
+#endif
+
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
__mmdrop(mm);
}
+ static inline void mmdrop_async_fn(struct work_struct *work)
+ {
+ struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
+ __mmdrop(mm);
+ }
+
+ static inline void mmdrop_async(struct mm_struct *mm)
+ {
+ if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
+ INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
+ schedule_work(&mm->async_put_work);
+ }
+ }
+
static inline bool mmget_not_zero(struct mm_struct *mm)
{
return atomic_inc_not_zero(&mm->mm_users);
#endif
}
+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+ return p->preempt_disable_ip;
+#else
+ return 0;
+#endif
+}
+
/*
* Does a critical section need to be broken due to another
* task waiting?: (technically does not depend on CONFIG_PREEMPT,
return task_rlimit_max(current, limit);
}
+#define SCHED_CPUFREQ_RT (1U << 0)
+#define SCHED_CPUFREQ_DL (1U << 1)
+
+#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
- void (*func)(struct update_util_data *data,
- u64 time, unsigned long util, unsigned long max);
+ void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
};
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
- void (*func)(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max));
+ void (*func)(struct update_util_data *data, u64 time,
+ unsigned int flags));
void cpufreq_remove_update_util_hook(int cpu);
#endif /* CONFIG_CPU_FREQ */
#include <linux/pid_namespace.h>
#include <linux/device.h>
#include <linux/kthread.h>
+ #include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/idr.h>
*/
static void __init mm_init(void)
{
+ /* Does address_space.flags still fit into a 32-bit ulong? */
+ BUILD_BUG_ON(AS_LAST_FLAG > 32);
+
/*
* page_ext requires contiguous pages,
* bigger than MAX_ORDER unless SPARSEMEM.
}
WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf);
+ add_latent_entropy();
return ret;
}
* Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
* kmemcache based allocator.
*/
-# if THREAD_SIZE >= PAGE_SIZE
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
- int node)
+# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
{
+#ifdef CONFIG_VMAP_STACK
+ void *stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ THREADINFO_GFP | __GFP_HIGHMEM,
+ PAGE_KERNEL,
+ 0, node,
+ __builtin_return_address(0));
+
+ /*
+ * We can't call find_vm_area() in interrupt context, and
+ * free_thread_stack() can be called in interrupt context,
+ * so cache the vm_struct.
+ */
+ if (stack)
+ tsk->stack_vm_area = find_vm_area(stack);
+ return stack;
+#else
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
+#endif
}
-static inline void free_thread_stack(unsigned long *stack)
+static inline void free_thread_stack(struct task_struct *tsk)
{
- __free_pages(virt_to_page(stack), THREAD_SIZE_ORDER);
+ if (task_stack_vm_area(tsk))
+ vfree(tsk->stack);
+ else
+ __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_stack_cache;
return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
}
-static void free_thread_stack(unsigned long *stack)
+static void free_thread_stack(struct task_struct *tsk)
{
- kmem_cache_free(thread_stack_cache, stack);
+ kmem_cache_free(thread_stack_cache, tsk->stack);
}
void thread_stack_cache_init(void)
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
-static void account_kernel_stack(unsigned long *stack, int account)
+static void account_kernel_stack(struct task_struct *tsk, int account)
{
- /* All stack pages are in the same zone and belong to the same memcg. */
- struct page *first_page = virt_to_page(stack);
+ void *stack = task_stack_page(tsk);
+ struct vm_struct *vm = task_stack_vm_area(tsk);
+
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
- mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
- THREAD_SIZE / 1024 * account);
+ if (vm) {
+ int i;
- memcg_kmem_update_page_stat(
- first_page, MEMCG_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
+ BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+ mod_zone_page_state(page_zone(vm->pages[i]),
+ NR_KERNEL_STACK_KB,
+ PAGE_SIZE / 1024 * account);
+ }
+
+ /* All stack pages belong to the same memcg. */
+ memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
+ account * (THREAD_SIZE / 1024));
+ } else {
+ /*
+ * All stack pages are in the same zone and belong to the
+ * same memcg.
+ */
+ struct page *first_page = virt_to_page(stack);
+
+ mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
+ THREAD_SIZE / 1024 * account);
+
+ memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
+ account * (THREAD_SIZE / 1024));
+ }
}
void free_task(struct task_struct *tsk)
{
- account_kernel_stack(tsk->stack, -1);
+ account_kernel_stack(tsk, -1);
arch_release_thread_stack(tsk->stack);
- free_thread_stack(tsk->stack);
+ free_thread_stack(tsk);
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
{
taskstats_tgid_free(sig);
sched_autogroup_exit(sig);
+ /*
+ * __mmdrop is not safe to call from softirq context on x86 due to
+ * pgd_dtor so postpone it to the async context
+ */
+ if (sig->oom_mm)
+ mmdrop_async(sig->oom_mm);
kmem_cache_free(signal_cachep, sig);
}
{
struct task_struct *tsk;
unsigned long *stack;
+ struct vm_struct *stack_vm_area;
int err;
if (node == NUMA_NO_NODE)
if (!stack)
goto free_tsk;
+ stack_vm_area = task_stack_vm_area(tsk);
+
err = arch_dup_task_struct(tsk, orig);
+
+ /*
+ * arch_dup_task_struct() clobbers the stack-related fields. Make
+ * sure they're properly initialized before using any stack-related
+ * functions again.
+ */
+ tsk->stack = stack;
+#ifdef CONFIG_VMAP_STACK
+ tsk->stack_vm_area = stack_vm_area;
+#endif
+
if (err)
goto free_stack;
- tsk->stack = stack;
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
tsk->task_frag.page = NULL;
tsk->wake_q.next = NULL;
- account_kernel_stack(stack, 1);
+ account_kernel_stack(tsk, 1);
kcov_task_init(tsk);
return tsk;
free_stack:
- free_thread_stack(stack);
+ free_thread_stack(tsk);
free_tsk:
free_task_struct(tsk);
return NULL;
}
#ifdef CONFIG_MMU
-static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+static __latent_entropy int dup_mmap(struct mm_struct *mm,
+ struct mm_struct *oldmm)
{
struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
struct rb_node **rb_link, *rb_parent;
ksm_exit(mm);
khugepaged_exit(mm); /* must run before exit_mmap */
exit_mmap(mm);
+ mm_put_huge_zero_page(mm);
set_mm_exe_file(mm, NULL);
if (!list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
}
if (mm->binfmt)
module_put(mm->binfmt->module);
+ set_bit(MMF_OOM_SKIP, &mm->flags);
mmdrop(mm);
}
* parts of the process environment (as per the clone
* flags). The actual kick-off is left to the caller.
*/
-static struct task_struct *copy_process(unsigned long clone_flags,
+static __latent_entropy struct task_struct *copy_process(
+ unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *child_tidptr,
p = copy_process(clone_flags, stack_start, stack_size,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
+ add_latent_entropy();
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
__free_page(buf->page_array[i]);
relay_free_page_array(buf->page_array);
}
- chan->buf[buf->cpu] = NULL;
+ *per_cpu_ptr(chan->buf, buf->cpu) = NULL;
kfree(buf->padding);
kfree(buf);
kref_put(&chan->kref, relay_destroy_channel);
/**
* wakeup_readers - wake up readers waiting on a channel
- * @data: contains the channel buffer
+ * @work: contains the channel buffer
*
- * This is the timer function used to defer reader waking.
+ * This is the function used to defer reader waking
*/
- static void wakeup_readers(unsigned long data)
+ static void wakeup_readers(struct irq_work *work)
{
- struct rchan_buf *buf = (struct rchan_buf *)data;
+ struct rchan_buf *buf;
+
+ buf = container_of(work, struct rchan_buf, wakeup_work);
wake_up_interruptible(&buf->read_wait);
}
if (init) {
init_waitqueue_head(&buf->read_wait);
kref_init(&buf->kref);
- setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
- } else
- del_timer_sync(&buf->timer);
+ init_irq_work(&buf->wakeup_work, wakeup_readers);
+ } else {
+ irq_work_sync(&buf->wakeup_work);
+ }
buf->subbufs_produced = 0;
buf->subbufs_consumed = 0;
*/
void relay_reset(struct rchan *chan)
{
+ struct rchan_buf *buf;
unsigned int i;
if (!chan)
return;
- if (chan->is_global && chan->buf[0]) {
- __relay_reset(chan->buf[0], 0);
+ if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
+ __relay_reset(buf, 0);
return;
}
mutex_lock(&relay_channels_mutex);
for_each_possible_cpu(i)
- if (chan->buf[i])
- __relay_reset(chan->buf[i], 0);
+ if ((buf = *per_cpu_ptr(chan->buf, i)))
+ __relay_reset(buf, 0);
mutex_unlock(&relay_channels_mutex);
}
EXPORT_SYMBOL_GPL(relay_reset);
struct dentry *dentry;
if (chan->is_global)
- return chan->buf[0];
+ return *per_cpu_ptr(chan->buf, 0);
buf = relay_create_buf(chan);
if (!buf)
__relay_reset(buf, 1);
if(chan->is_global) {
- chan->buf[0] = buf;
+ *per_cpu_ptr(chan->buf, 0) = buf;
buf->cpu = 0;
}
static void relay_close_buf(struct rchan_buf *buf)
{
buf->finalized = 1;
- del_timer_sync(&buf->timer);
+ irq_work_sync(&buf->wakeup_work);
buf->chan->cb->remove_buf_file(buf->dentry);
kref_put(&buf->kref, relay_remove_buf);
}
chan->cb = cb;
}
-/**
- * relay_hotcpu_callback - CPU hotplug callback
- * @nb: notifier block
- * @action: hotplug action to take
- * @hcpu: CPU number
- *
- * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
- */
-static int relay_hotcpu_callback(struct notifier_block *nb,
- unsigned long action,
- void *hcpu)
+int relay_prepare_cpu(unsigned int cpu)
{
- unsigned int hotcpu = (unsigned long)hcpu;
struct rchan *chan;
+ struct rchan_buf *buf;
- switch(action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- mutex_lock(&relay_channels_mutex);
- list_for_each_entry(chan, &relay_channels, list) {
- if (chan->buf[hotcpu])
- continue;
- chan->buf[hotcpu] = relay_open_buf(chan, hotcpu);
- if(!chan->buf[hotcpu]) {
- printk(KERN_ERR
- "relay_hotcpu_callback: cpu %d buffer "
- "creation failed\n", hotcpu);
- mutex_unlock(&relay_channels_mutex);
- return notifier_from_errno(-ENOMEM);
- }
+ mutex_lock(&relay_channels_mutex);
+ list_for_each_entry(chan, &relay_channels, list) {
+ if ((buf = *per_cpu_ptr(chan->buf, cpu)))
+ continue;
+ buf = relay_open_buf(chan, cpu);
+ if (!buf) {
+ pr_err("relay: cpu %d buffer creation failed\n", cpu);
+ mutex_unlock(&relay_channels_mutex);
+ return -ENOMEM;
}
- mutex_unlock(&relay_channels_mutex);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /* No need to flush the cpu : will be flushed upon
- * final relay_flush() call. */
- break;
+ *per_cpu_ptr(chan->buf, cpu) = buf;
}
- return NOTIFY_OK;
+ mutex_unlock(&relay_channels_mutex);
+ return 0;
}
/**
{
unsigned int i;
struct rchan *chan;
+ struct rchan_buf *buf;
if (!(subbuf_size && n_subbufs))
return NULL;
if (!chan)
return NULL;
+ chan->buf = alloc_percpu(struct rchan_buf *);
chan->version = RELAYFS_CHANNEL_VERSION;
chan->n_subbufs = n_subbufs;
chan->subbuf_size = subbuf_size;
mutex_lock(&relay_channels_mutex);
for_each_online_cpu(i) {
- chan->buf[i] = relay_open_buf(chan, i);
- if (!chan->buf[i])
+ buf = relay_open_buf(chan, i);
+ if (!buf)
goto free_bufs;
+ *per_cpu_ptr(chan->buf, i) = buf;
}
list_add(&chan->list, &relay_channels);
mutex_unlock(&relay_channels_mutex);
free_bufs:
for_each_possible_cpu(i) {
- if (chan->buf[i])
- relay_close_buf(chan->buf[i]);
+ if ((buf = *per_cpu_ptr(chan->buf, i)))
+ relay_close_buf(buf);
}
kref_put(&chan->kref, relay_destroy_channel);
unsigned int i, curr_cpu;
unsigned long flags;
struct dentry *dentry;
+ struct rchan_buf *buf;
struct rchan_percpu_buf_dispatcher disp;
if (!chan || !base_filename)
if (chan->is_global) {
err = -EINVAL;
- if (!WARN_ON_ONCE(!chan->buf[0])) {
- dentry = relay_create_buf_file(chan, chan->buf[0], 0);
+ buf = *per_cpu_ptr(chan->buf, 0);
+ if (!WARN_ON_ONCE(!buf)) {
+ dentry = relay_create_buf_file(chan, buf, 0);
if (dentry && !WARN_ON_ONCE(!chan->is_global)) {
- relay_set_buf_dentry(chan->buf[0], dentry);
+ relay_set_buf_dentry(buf, dentry);
err = 0;
}
}
* on all currently online CPUs.
*/
for_each_online_cpu(i) {
- if (unlikely(!chan->buf[i])) {
+ buf = *per_cpu_ptr(chan->buf, i);
+ if (unlikely(!buf)) {
WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
err = -EINVAL;
break;
}
- dentry = relay_create_buf_file(chan, chan->buf[i], i);
+ dentry = relay_create_buf_file(chan, buf, i);
if (unlikely(!dentry)) {
err = -EINVAL;
break;
if (curr_cpu == i) {
local_irq_save(flags);
- relay_set_buf_dentry(chan->buf[i], dentry);
+ relay_set_buf_dentry(buf, dentry);
local_irq_restore(flags);
} else {
- disp.buf = chan->buf[i];
+ disp.buf = buf;
disp.dentry = dentry;
smp_mb();
/* relay_channels_mutex must be held, so wait. */
buf->early_bytes += buf->chan->subbuf_size -
buf->padding[old_subbuf];
smp_mb();
- if (waitqueue_active(&buf->read_wait))
+ if (waitqueue_active(&buf->read_wait)) {
/*
* Calling wake_up_interruptible() from here
* will deadlock if we happen to be logging
* from the scheduler (trying to re-grab
* rq->lock), so defer it.
*/
- mod_timer(&buf->timer, jiffies + 1);
+ irq_work_queue(&buf->wakeup_work);
+ }
}
old = buf->data;
if (!chan)
return;
- if (cpu >= NR_CPUS || !chan->buf[cpu] ||
- subbufs_consumed > chan->n_subbufs)
+ buf = *per_cpu_ptr(chan->buf, cpu);
+ if (cpu >= NR_CPUS || !buf || subbufs_consumed > chan->n_subbufs)
return;
- buf = chan->buf[cpu];
if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed)
buf->subbufs_consumed = buf->subbufs_produced;
else
*/
void relay_close(struct rchan *chan)
{
+ struct rchan_buf *buf;
unsigned int i;
if (!chan)
return;
mutex_lock(&relay_channels_mutex);
- if (chan->is_global && chan->buf[0])
- relay_close_buf(chan->buf[0]);
+ if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0)))
+ relay_close_buf(buf);
else
for_each_possible_cpu(i)
- if (chan->buf[i])
- relay_close_buf(chan->buf[i]);
+ if ((buf = *per_cpu_ptr(chan->buf, i)))
+ relay_close_buf(buf);
if (chan->last_toobig)
printk(KERN_WARNING "relay: one or more items not logged "
*/
void relay_flush(struct rchan *chan)
{
+ struct rchan_buf *buf;
unsigned int i;
if (!chan)
return;
- if (chan->is_global && chan->buf[0]) {
- relay_switch_subbuf(chan->buf[0], 0);
+ if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
+ relay_switch_subbuf(buf, 0);
return;
}
mutex_lock(&relay_channels_mutex);
for_each_possible_cpu(i)
- if (chan->buf[i])
- relay_switch_subbuf(chan->buf[i], 0);
+ if ((buf = *per_cpu_ptr(chan->buf, i)))
+ relay_switch_subbuf(buf, 0);
mutex_unlock(&relay_channels_mutex);
}
EXPORT_SYMBOL_GPL(relay_flush);
.splice_read = relay_file_splice_read,
};
EXPORT_SYMBOL_GPL(relay_file_operations);
-
-static __init int relay_init(void)
-{
-
- hotcpu_notifier(relay_hotcpu_callback, 0);
- return 0;
-}
-
-early_initcall(relay_init);
static atomic_t huge_zero_refcount;
struct page *huge_zero_page __read_mostly;
- struct page *get_huge_zero_page(void)
+ static struct page *get_huge_zero_page(void)
{
struct page *zero_page;
retry:
return READ_ONCE(huge_zero_page);
}
- void put_huge_zero_page(void)
+ static void put_huge_zero_page(void)
{
/*
* Counter should never go to zero here. Only shrinker can put
BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
}
+ struct page *mm_get_huge_zero_page(struct mm_struct *mm)
+ {
+ if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+ return READ_ONCE(huge_zero_page);
+
+ if (!get_huge_zero_page())
+ return NULL;
+
+ if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+ put_huge_zero_page();
+
+ return READ_ONCE(huge_zero_page);
+ }
+
+ void mm_put_huge_zero_page(struct mm_struct *mm)
+ {
+ if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+ put_huge_zero_page();
+ }
+
static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
struct shrink_control *sc)
{
set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
}
+ unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len,
+ loff_t off, unsigned long flags, unsigned long size)
+ {
+ unsigned long addr;
+ loff_t off_end = off + len;
+ loff_t off_align = round_up(off, size);
+ unsigned long len_pad;
+
+ if (off_end <= off_align || (off_end - off_align) < size)
+ return 0;
+
+ len_pad = len + size;
+ if (len_pad < len || (off + len_pad) < off)
+ return 0;
+
+ addr = current->mm->get_unmapped_area(filp, 0, len_pad,
+ off >> PAGE_SHIFT, flags);
+ if (IS_ERR_VALUE(addr))
+ return 0;
+
+ addr += (off - addr) & (size - 1);
+ return addr;
+ }
+
+ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+ loff_t off = (loff_t)pgoff << PAGE_SHIFT;
+
+ if (addr)
+ goto out;
+ if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD))
+ goto out;
+
+ addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE);
+ if (addr)
+ return addr;
+
+ out:
+ return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+ }
+ EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
+
static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
gfp_t gfp)
{
pgtable = pte_alloc_one(vma->vm_mm, haddr);
if (unlikely(!pgtable))
return VM_FAULT_OOM;
- zero_page = get_huge_zero_page();
+ zero_page = mm_get_huge_zero_page(vma->vm_mm);
if (unlikely(!zero_page)) {
pte_free(vma->vm_mm, pgtable);
count_vm_event(THP_FAULT_FALLBACK);
}
} else
spin_unlock(fe->ptl);
- if (!set) {
+ if (!set)
pte_free(vma->vm_mm, pgtable);
- put_huge_zero_page();
- }
return ret;
}
gfp = alloc_hugepage_direct_gfpmask(vma);
* since we already have a zero page to copy. It just takes a
* reference.
*/
- zero_page = get_huge_zero_page();
+ zero_page = mm_get_huge_zero_page(dst_mm);
set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
zero_page);
ret = 0;
update_mmu_cache_pmd(vma, fe->address, fe->pmd);
if (!page) {
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
- put_huge_zero_page();
} else {
VM_BUG_ON_PAGE(!PageHead(page), page);
page_remove_rmap(page, true);
goto out;
page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!PageHead(page), page);
+ VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
if (flags & FOLL_TOUCH)
touch_pmd(vma, addr, pmd);
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
}
skip_mlock:
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
- VM_BUG_ON_PAGE(!PageCompound(page), page);
+ VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
if (flags & FOLL_GET)
get_page(page);
}
smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
- put_huge_zero_page();
}
static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (!vma_is_anonymous(vma)) {
_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
- if (is_huge_zero_pmd(_pmd))
- put_huge_zero_page();
if (vma_is_dax(vma))
return;
page = pmd_page(_pmd);
return false;
}
- void throttle_vm_writeout(gfp_t gfp_mask)
- {
- unsigned long background_thresh;
- unsigned long dirty_thresh;
-
- for ( ; ; ) {
- global_dirty_limits(&background_thresh, &dirty_thresh);
- dirty_thresh = hard_dirty_limit(&global_wb_domain, dirty_thresh);
-
- /*
- * Boost the allowable dirty threshold a bit for page
- * allocators so they don't get DoS'ed by heavy writers
- */
- dirty_thresh += dirty_thresh / 10; /* wheeee... */
-
- if (global_node_page_state(NR_UNSTABLE_NFS) +
- global_node_page_state(NR_WRITEBACK) <= dirty_thresh)
- break;
- congestion_wait(BLK_RW_ASYNC, HZ/10);
-
- /*
- * The caller might hold locks which can prevent IO completion
- * or progress in the filesystem. So we cannot just sit here
- * waiting for IO to complete.
- */
- if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
- break;
- }
- }
-
/*
* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
*/
ratelimit_pages = 16;
}
-static int
-ratelimit_handler(struct notifier_block *self, unsigned long action,
- void *hcpu)
+static int page_writeback_cpu_online(unsigned int cpu)
{
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DEAD:
- writeback_set_ratelimit();
- return NOTIFY_OK;
- default:
- return NOTIFY_DONE;
- }
+ writeback_set_ratelimit();
+ return 0;
}
-static struct notifier_block ratelimit_nb = {
- .notifier_call = ratelimit_handler,
- .next = NULL,
-};
-
/*
* Called early on to tune the page writeback dirty limits.
*
{
BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
- writeback_set_ratelimit();
- register_cpu_notifier(&ratelimit_nb);
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
+ page_writeback_cpu_online, NULL);
+ cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
+ page_writeback_cpu_online);
}
/**
int ret;
lock_page_memcg(page);
- if (mapping) {
+ if (mapping && mapping_use_writeback_tags(mapping)) {
struct inode *inode = mapping->host;
struct backing_dev_info *bdi = inode_to_bdi(inode);
unsigned long flags;
int ret;
lock_page_memcg(page);
- if (mapping) {
+ if (mapping && mapping_use_writeback_tags(mapping)) {
struct inode *inode = mapping->host;
struct backing_dev_info *bdi = inode_to_bdi(inode);
unsigned long flags;
#include <linux/page_owner.h>
#include <linux/kthread.h>
#include <linux/memcontrol.h>
+#include <linux/random.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
int _node_numa_mem_[MAX_NUMNODES];
#endif
+#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
+volatile u64 latent_entropy __latent_entropy;
+EXPORT_SYMBOL(latent_entropy);
+#endif
+
/*
* Array of node states.
*/
static unsigned long __meminitdata nr_kernel_pages;
static unsigned long __meminitdata nr_all_pages;
- static unsigned long __meminitdata dma_reserve;
+ static unsigned long __meminitdata nr_memory_reserve;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
if (!debug_pagealloc_enabled())
return false;
+ if (!debug_guardpage_minorder())
+ return false;
+
return true;
}
if (!debug_pagealloc_enabled())
return;
+ if (!debug_guardpage_minorder())
+ return;
+
_debug_guardpage_enabled = true;
}
pr_info("Setting debug_guardpage_minorder to %lu\n", res);
return 0;
}
- __setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
+ early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
- static inline void set_page_guard(struct zone *zone, struct page *page,
+ static inline bool set_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype)
{
struct page_ext *page_ext;
if (!debug_guardpage_enabled())
- return;
+ return false;
+
+ if (order >= debug_guardpage_minorder())
+ return false;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
- return;
+ return false;
__set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, order);
/* Guard pages are not available for any usage */
__mod_zone_freepage_state(zone, -(1 << order), migratetype);
+
+ return true;
}
static inline void clear_page_guard(struct zone *zone, struct page *page,
__mod_zone_freepage_state(zone, (1 << order), migratetype);
}
#else
- struct page_ext_operations debug_guardpage_ops = { NULL, };
- static inline void set_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype) {}
+ struct page_ext_operations debug_guardpage_ops;
+ static inline bool set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) { return false; }
static inline void clear_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype) {}
#endif
local_irq_restore(flags);
}
+bool __meminitdata ram_latent_entropy;
+
+static int __init setup_ram_latent_entropy(char *str)
+{
+ ram_latent_entropy = true;
+ return 0;
+}
+early_param("ram_latent_entropy", setup_ram_latent_entropy);
+
static void __init __free_pages_boot_core(struct page *page, unsigned int order)
{
unsigned int nr_pages = 1 << order;
__ClearPageReserved(p);
set_page_count(p, 0);
+ if (ram_latent_entropy && !PageHighMem(page) &&
+ page_to_pfn(page) < 0x100000) {
+ u64 hash = 0;
+ size_t index, end = PAGE_SIZE * nr_pages / sizeof(hash);
+ const u64 *data = lowmem_page_address(page);
+
+ for (index = 0; index < end; index++)
+ hash ^= hash + data[index];
+ add_device_randomness((const void *)&hash, sizeof(hash));
+ }
+
page_zone(page)->managed_pages += nr_pages;
set_page_refcounted(page);
__free_pages(page, order);
return;
/* Free a large naturally-aligned chunk if possible */
- if (nr_pages == MAX_ORDER_NR_PAGES &&
- (pfn & (MAX_ORDER_NR_PAGES-1)) == 0) {
+ if (nr_pages == pageblock_nr_pages &&
+ (pfn & (pageblock_nr_pages - 1)) == 0) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
- __free_pages_boot_core(page, MAX_ORDER-1);
+ __free_pages_boot_core(page, pageblock_order);
return;
}
- for (i = 0; i < nr_pages; i++, page++)
+ for (i = 0; i < nr_pages; i++, page++, pfn++) {
+ if ((pfn & (pageblock_nr_pages - 1)) == 0)
+ set_pageblock_migratetype(page, MIGRATE_MOVABLE);
__free_pages_boot_core(page, 0);
+ }
}
/* Completion tracking for deferred_init_memmap() threads */
/*
* Ensure pfn_valid is checked every
- * MAX_ORDER_NR_PAGES for memory holes
+ * pageblock_nr_pages for memory holes
*/
- if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) {
+ if ((pfn & (pageblock_nr_pages - 1)) == 0) {
if (!pfn_valid(pfn)) {
page = NULL;
goto free_range;
}
/* Minimise pfn page lookups and scheduler checks */
- if (page && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) {
+ if (page && (pfn & (pageblock_nr_pages - 1)) != 0) {
page++;
} else {
nr_pages += nr_to_free;
free_base_page = NULL;
free_base_pfn = nr_to_free = 0;
}
+ /* Free the last block of pages to allocator */
+ nr_pages += nr_to_free;
+ deferred_free_range(free_base_page, free_base_pfn, nr_to_free);
first_init_pfn = max(end_pfn, first_init_pfn);
}
size >>= 1;
VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
- if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
- debug_guardpage_enabled() &&
- high < debug_guardpage_minorder()) {
- /*
- * Mark as guard pages (or page), that will allow to
- * merge back to allocator when buddy will be freed.
- * Corresponding page table entries will not be touched,
- * pages will stay not present in virtual address space
- */
- set_page_guard(zone, &page[size], high, migratetype);
+ /*
+ * Mark as guard pages (or page), that will allow to
+ * merge back to allocator when buddy will be freed.
+ * Corresponding page table entries will not be touched,
+ * pages will stay not present in virtual address space
+ */
+ if (set_page_guard(zone, &page[size], high, migratetype))
continue;
- }
+
list_add(&page[size].lru, &area->free_list[migratetype]);
area->nr_free++;
set_page_order(&page[size], high);
mt = get_pageblock_migratetype(page);
if (!is_migrate_isolate(mt)) {
- /* Obey watermarks as if the page was being allocated */
- watermark = low_wmark_pages(zone) + (1 << order);
- if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+ /*
+ * Obey watermarks as if the page was being allocated. We can
+ * emulate a high-order watermark check with a raised order-0
+ * watermark, because we already know our high-order page
+ * exists.
+ */
+ watermark = min_wmark_pages(zone) + (1UL << order);
+ if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
return 0;
__mod_zone_freepage_state(zone, -(1UL << order), mt);
return NULL;
}
+ static inline bool
+ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
+ enum compact_result compact_result,
+ enum compact_priority *compact_priority,
+ int compaction_retries)
+ {
+ int max_retries = MAX_COMPACT_RETRIES;
+ int min_priority;
+
+ if (!order)
+ return false;
+
+ /*
+ * compaction considers all the zone as desperately out of memory
+ * so it doesn't really make much sense to retry except when the
+ * failure could be caused by insufficient priority
+ */
+ if (compaction_failed(compact_result))
+ goto check_priority;
+
+ /*
+ * make sure the compaction wasn't deferred or didn't bail out early
+ * due to locks contention before we declare that we should give up.
+ * But do not retry if the given zonelist is not suitable for
+ * compaction.
+ */
+ if (compaction_withdrawn(compact_result))
+ return compaction_zonelist_suitable(ac, order, alloc_flags);
+
+ /*
+ * !costly requests are much more important than __GFP_REPEAT
+ * costly ones because they are de facto nofail and invoke OOM
+ * killer to move on while costly can fail and users are ready
+ * to cope with that. 1/4 retries is rather arbitrary but we
+ * would need much more detailed feedback from compaction to
+ * make a better decision.
+ */
+ if (order > PAGE_ALLOC_COSTLY_ORDER)
+ max_retries /= 4;
+ if (compaction_retries <= max_retries)
+ return true;
+
+ /*
+ * Make sure there is at least one attempt at the highest priority
+ * if we exhausted all retries at the lower priorities
+ */
+ check_priority:
+ min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
+ MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
+ if (*compact_priority > min_priority) {
+ (*compact_priority)--;
+ return true;
+ }
+ return false;
+ }
#else
static inline struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
return NULL;
}
- #endif /* CONFIG_COMPACTION */
-
static inline bool
should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
enum compact_result compact_result,
}
return false;
}
+ #endif /* CONFIG_COMPACTION */
/* Perform direct synchronous page reclaim */
static int
int j;
struct zonelist *zonelist;
- zonelist = &pgdat->node_zonelists[0];
+ zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
;
j = build_zonelists_node(NODE_DATA(node), zonelist, j);
int j;
struct zonelist *zonelist;
- zonelist = &pgdat->node_zonelists[1];
+ zonelist = &pgdat->node_zonelists[ZONELIST_NOFALLBACK];
j = build_zonelists_node(pgdat, zonelist, 0);
zonelist->_zonerefs[j].zone = NULL;
zonelist->_zonerefs[j].zone_idx = 0;
struct zone *z;
struct zonelist *zonelist;
- zonelist = &pgdat->node_zonelists[0];
+ zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
pos = 0;
for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
for (j = 0; j < nr_nodes; j++) {
local_node = pgdat->node_id;
- zonelist = &pgdat->node_zonelists[0];
+ zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
j = build_zonelists_node(pgdat, zonelist, 0);
/*
break;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
- /*
- * If not mirrored_kernelcore and ZONE_MOVABLE exists, range
- * from zone_movable_pfn[nid] to end of each node should be
- * ZONE_MOVABLE not ZONE_NORMAL. skip it.
- */
- if (!mirrored_kernelcore && zone_movable_pfn[nid])
- if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid])
- continue;
-
/*
* Check given memblock attribute by firmware which can affect
* kernel memory layout. If zone==ZONE_MOVABLE but memory is
*zone_end_pfn = min(node_end_pfn,
arch_zone_highest_possible_pfn[movable_zone]);
+ /* Adjust for ZONE_MOVABLE starting within this range */
+ } else if (!mirrored_kernelcore &&
+ *zone_start_pfn < zone_movable_pfn[nid] &&
+ *zone_end_pfn > zone_movable_pfn[nid]) {
+ *zone_end_pfn = zone_movable_pfn[nid];
+
/* Check if this whole range is within ZONE_MOVABLE */
} else if (*zone_start_pfn >= zone_movable_pfn[nid])
*zone_start_pfn = *zone_end_pfn;
* Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
* and vice versa.
*/
- if (zone_movable_pfn[nid]) {
- if (mirrored_kernelcore) {
- unsigned long start_pfn, end_pfn;
- struct memblock_region *r;
-
- for_each_memblock(memory, r) {
- start_pfn = clamp(memblock_region_memory_base_pfn(r),
- zone_start_pfn, zone_end_pfn);
- end_pfn = clamp(memblock_region_memory_end_pfn(r),
- zone_start_pfn, zone_end_pfn);
-
- if (zone_type == ZONE_MOVABLE &&
- memblock_is_mirror(r))
- nr_absent += end_pfn - start_pfn;
-
- if (zone_type == ZONE_NORMAL &&
- !memblock_is_mirror(r))
- nr_absent += end_pfn - start_pfn;
- }
- } else {
- if (zone_type == ZONE_NORMAL)
- nr_absent += node_end_pfn - zone_movable_pfn[nid];
+ if (mirrored_kernelcore && zone_movable_pfn[nid]) {
+ unsigned long start_pfn, end_pfn;
+ struct memblock_region *r;
+
+ for_each_memblock(memory, r) {
+ start_pfn = clamp(memblock_region_memory_base_pfn(r),
+ zone_start_pfn, zone_end_pfn);
+ end_pfn = clamp(memblock_region_memory_end_pfn(r),
+ zone_start_pfn, zone_end_pfn);
+
+ if (zone_type == ZONE_MOVABLE &&
+ memblock_is_mirror(r))
+ nr_absent += end_pfn - start_pfn;
+
+ if (zone_type == ZONE_NORMAL &&
+ !memblock_is_mirror(r))
+ nr_absent += end_pfn - start_pfn;
}
}
}
/* Account for reserved pages */
- if (j == 0 && freesize > dma_reserve) {
- freesize -= dma_reserve;
+ if (j == 0 && freesize > nr_memory_reserve) {
+ freesize -= nr_memory_reserve;
printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
- zone_names[0], dma_reserve);
+ zone_names[0], nr_memory_reserve);
}
if (!is_highmem_idx(j))
}
/**
- * set_dma_reserve - set the specified number of pages reserved in the first zone
- * @new_dma_reserve: The number of pages to mark reserved
+ * set_memory_reserve - set number of pages reserved in the first zone
+ * @nr_reserve: The number of pages to mark reserved
+ * @inc: true increment to existing value; false set new value.
*
* The per-cpu batchsize and zone watermarks are determined by managed_pages.
* In the DMA zone, a significant percentage may be consumed by kernel image
* first zone (e.g., ZONE_DMA). The effect will be lower watermarks and
* smaller per-cpu batchsize.
*/
- void __init set_dma_reserve(unsigned long new_dma_reserve)
+ void __init set_memory_reserve(unsigned long nr_reserve, bool inc)
{
- dma_reserve = new_dma_reserve;
+ if (inc)
+ nr_memory_reserve += nr_reserve;
+ else
+ nr_memory_reserve = nr_reserve;
}
void __init free_area_init(unsigned long *zones_size)
__setup("hashdist=", set_hashdist);
#endif
+ #ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
+ /*
+ * Returns the number of pages that arch has reserved but
+ * is not known to alloc_large_system_hash().
+ */
+ static unsigned long __init arch_reserved_kernel_pages(void)
+ {
+ return 0;
+ }
+ #endif
+
/*
* allocate a large system hash table from bootmem
* - it is assumed that the hash table must contain an exact power-of-2
if (!numentries) {
/* round applicable memory size up to nearest megabyte */
numentries = nr_kernel_pages;
+ numentries -= arch_reserved_kernel_pages();
/* It isn't necessary when PAGE_SIZE >= 1MB */
if (PAGE_SHIFT < 20)
spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
+ parent->num_slabs = 0;
}
#define MAKE_LIST(cachep, listp, slab, nodeid) \
return 0;
}
+#if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP)
/*
* Allocates and initializes node for a node on each slab cache, used for
* either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
return 0;
}
+#endif
static int setup_kmem_cache_node(struct kmem_cache *cachep,
int node, gfp_t gfp, bool force_change)
return ret;
}
+#ifdef CONFIG_SMP
+
static void cpuup_canceled(long cpu)
{
struct kmem_cache *cachep;
return -ENOMEM;
}
-static int cpuup_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+int slab_prepare_cpu(unsigned int cpu)
{
- long cpu = (long)hcpu;
- int err = 0;
+ int err;
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- mutex_lock(&slab_mutex);
- err = cpuup_prepare(cpu);
- mutex_unlock(&slab_mutex);
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- start_cpu_timer(cpu);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- /*
- * Shutdown cache reaper. Note that the slab_mutex is
- * held so that if cache_reap() is invoked it cannot do
- * anything expensive but will only modify reap_work
- * and reschedule the timer.
- */
- cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
- /* Now the cache_reaper is guaranteed to be not running. */
- per_cpu(slab_reap_work, cpu).work.func = NULL;
- break;
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- start_cpu_timer(cpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /*
- * Even if all the cpus of a node are down, we don't free the
- * kmem_cache_node of any cache. This to avoid a race between
- * cpu_down, and a kmalloc allocation from another cpu for
- * memory from the node of the cpu going down. The node
- * structure is usually allocated from kmem_cache_create() and
- * gets destroyed at kmem_cache_destroy().
- */
- /* fall through */
+ mutex_lock(&slab_mutex);
+ err = cpuup_prepare(cpu);
+ mutex_unlock(&slab_mutex);
+ return err;
+}
+
+/*
+ * This is called for a failed online attempt and for a successful
+ * offline.
+ *
+ * Even if all the cpus of a node are down, we don't free the
+ * kmem_list3 of any cache. This to avoid a race between cpu_down, and
+ * a kmalloc allocation from another cpu for memory from the node of
+ * the cpu going down. The list3 structure is usually allocated from
+ * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
+ */
+int slab_dead_cpu(unsigned int cpu)
+{
+ mutex_lock(&slab_mutex);
+ cpuup_canceled(cpu);
+ mutex_unlock(&slab_mutex);
+ return 0;
+}
#endif
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- mutex_lock(&slab_mutex);
- cpuup_canceled(cpu);
- mutex_unlock(&slab_mutex);
- break;
- }
- return notifier_from_errno(err);
+
+static int slab_online_cpu(unsigned int cpu)
+{
+ start_cpu_timer(cpu);
+ return 0;
}
-static struct notifier_block cpucache_notifier = {
- &cpuup_callback, NULL, 0
-};
+static int slab_offline_cpu(unsigned int cpu)
+{
+ /*
+ * Shutdown cache reaper. Note that the slab_mutex is held so
+ * that if cache_reap() is invoked it cannot do anything
+ * expensive but will only modify reap_work and reschedule the
+ * timer.
+ */
+ cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
+ /* Now the cache_reaper is guaranteed to be not running. */
+ per_cpu(slab_reap_work, cpu).work.func = NULL;
+ return 0;
+}
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
/*
/* Done! */
slab_state = FULL;
- /*
- * Register a cpu startup notifier callback that initializes
- * cpu_cache_get for all new cpus
- */
- register_cpu_notifier(&cpucache_notifier);
-
#ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
static int __init cpucache_init(void)
{
- int cpu;
+ int ret;
/*
* Register the timers that return unneeded pages to the page allocator
*/
- for_each_online_cpu(cpu)
- start_cpu_timer(cpu);
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
+ slab_online_cpu, slab_offline_cpu);
+ WARN_ON(ret < 0);
/* Done! */
slab_state = FULL;
for_each_kmem_cache_node(cachep, node, n) {
unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
unsigned long active_slabs = 0, num_slabs = 0;
+ unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+ unsigned long num_slabs_full;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->slabs_full, lru) {
- active_objs += cachep->num;
- active_slabs++;
- }
+ num_slabs = n->num_slabs;
list_for_each_entry(page, &n->slabs_partial, lru) {
active_objs += page->active;
- active_slabs++;
+ num_slabs_partial++;
}
list_for_each_entry(page, &n->slabs_free, lru)
- num_slabs++;
+ num_slabs_free++;
free_objects += n->free_objects;
spin_unlock_irqrestore(&n->list_lock, flags);
- num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
+ active_slabs = num_slabs - num_slabs_free;
+ num_slabs_full = num_slabs -
+ (num_slabs_partial + num_slabs_free);
+ active_objs += (num_slabs_full * cachep->num);
+
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
node, active_slabs, num_slabs, active_objs, num_objs,
free_objects);
page = list_entry(p, struct page, lru);
list_del(&page->lru);
+ n->num_slabs--;
/*
* Safe to drop the lock. The slab is no longer linked
* to the cache.
list_add_tail(&page->lru, &(n->slabs_free));
else
fixup_slab_list(cachep, n, page, &list);
+
+ n->num_slabs++;
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - page->active;
spin_unlock(&n->list_lock);
page = list_last_entry(&n->slabs_free, struct page, lru);
list_move(&page->lru, list);
+ n->num_slabs--;
}
}
unsigned long num_objs;
unsigned long active_slabs = 0;
unsigned long num_slabs, free_objects = 0, shared_avail = 0;
+ unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+ unsigned long num_slabs_full = 0;
const char *name;
char *error = NULL;
int node;
check_irq_on();
spin_lock_irq(&n->list_lock);
- list_for_each_entry(page, &n->slabs_full, lru) {
- if (page->active != cachep->num && !error)
- error = "slabs_full accounting error";
- active_objs += cachep->num;
- active_slabs++;
- }
+ num_slabs += n->num_slabs;
+
list_for_each_entry(page, &n->slabs_partial, lru) {
if (page->active == cachep->num && !error)
error = "slabs_partial accounting error";
if (!page->active && !error)
error = "slabs_partial accounting error";
active_objs += page->active;
- active_slabs++;
+ num_slabs_partial++;
}
+
list_for_each_entry(page, &n->slabs_free, lru) {
if (page->active && !error)
error = "slabs_free accounting error";
- num_slabs++;
+ num_slabs_free++;
}
+
free_objects += n->free_objects;
if (n->shared)
shared_avail += n->shared->avail;
spin_unlock_irq(&n->list_lock);
}
- num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
+ active_slabs = num_slabs - num_slabs_free;
+ num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free);
+ active_objs += (num_slabs_full * cachep->num);
+
if (num_objs - active_objs != free_objects && !error)
error = "free_objects accounting error";