ARM/Annapurna Labs ALPINE ARCHITECTURE
M: Tsahee Zidenberg <tsahee@annapurnalabs.com>
M: Antoine Tenart <antoine.tenart@free-electrons.com>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-alpine/
F: arch/arm/boot/dts/alpine*
platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y)))
ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y)
+ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y)
ifeq ($(KBUILD_SRC),)
KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(machdirs) $(platdirs))
else
KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs))
endif
endif
+endif
export TEXT_OFFSET GZFLAGS MMUEXT
* associativity as these may be erroneously set
* up by boot loader(s).
*/
- cache-size = <1048576>; // 1MB
- cache-sets = <4096>;
+ cache-size = <131072>; // 128KB
+ cache-sets = <512>;
cache-line-size = <32>;
arm,parity-disable;
- arm,tag-latency = <1>;
- arm,data-latency = <1 1>;
- arm,dirty-latency = <1>;
+ arm,tag-latency = <1 1 1>;
+ arm,data-latency = <1 1 1>;
};
scu: scu@1f000000 {
};
syscon {
- compatible = "arm,integrator-ap-syscon";
+ compatible = "arm,integrator-ap-syscon", "syscon";
reg = <0x11000000 0x100>;
interrupt-parent = <&pic>;
/* These are the logical module IRQs */
};
syscon {
- compatible = "arm,integrator-cp-syscon";
+ compatible = "arm,integrator-cp-syscon", "syscon";
reg = <0xcb000000 0x100>;
};
cpu_on = <0x84000003>;
};
- psci {
- compatible = "arm,psci";
- method = "smc";
- cpu_suspend = <0x84000001>;
- cpu_off = <0x84000002>;
- cpu_on = <0x84000003>;
- };
-
soc {
#address-cells = <1>;
#size-cells = <1>;
* Pin 41: BR_UART1_TXD
* Pin 44: BR_UART1_RXD
*/
- serial@70006000 {
+ serial@0,70006000 {
compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
status = "okay";
};
* Pin 71: UART2_CTS_L
* Pin 74: UART2_RTS_L
*/
- serial@70006040 {
+ serial@0,70006040 {
compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
status = "okay";
};
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_PAGE_POISONING=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_PAGE_POISONING=y
mm_segment_t fs;
long ret, err, i;
- if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
+ if (maxevents <= 0 ||
+ maxevents > (INT_MAX/sizeof(*kbuf)) ||
+ maxevents > (INT_MAX/sizeof(*events)))
return -EINVAL;
+ if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
+ return -EFAULT;
kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
if (nsops < 1 || nsops > SEMOPM)
return -EINVAL;
+ if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
+ return -EFAULT;
sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
if (!sops)
return -ENOMEM;
menuconfig ARCH_CLPS711X
bool "Cirrus Logic EP721x/EP731x-based"
depends on ARCH_MULTI_V4T
- select ARCH_REQUIRE_GPIOLIB
select AUTO_ZRELADDR
select CLKSRC_OF
select CLPS711X_TIMER
select COMMON_CLK
select CPU_ARM720T
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
select MFD_SYSCON
select OF_IRQ
select USE_OF
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
- -I$(srctree)/arch/arm/plat-orion/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-orion/include
AFLAGS_coherency_ll.o := -Wa,-march=armv7-a
CFLAGS_pmsu.o := -march=armv7-a
config MACH_OX810SE
bool "Support OX810SE Based Products"
+ select ARCH_HAS_RESET_CONTROLLER
select COMMON_CLK_OXNAS
select CPU_ARM926T
select MFD_SYSCON
select OXNAS_RPS_TIMER
select PINCTRL_OXNAS
+ select RESET_CONTROLLER
select RESET_OXNAS
select VERSATILE_FPGA_IRQ
help
*/
#include <linux/kernel.h>
+#include <linux/module.h> /* symbol_get ; symbol_put */
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/major.h>
*/
#include <linux/kernel.h>
+#include <linux/module.h> /* symbol_get ; symbol_put */
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/gpio_keys.h>
#
# Makefile for the linux kernel.
#
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
- -I$(srctree)/arch/arm/plat-versatile/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-versatile/include
obj-y := core.o
obj-$(CONFIG_REALVIEW_DT) += realview-dt.o
#
# Licensed under GPLv2
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/$(src)/include -I$(srctree)/arch/arm/plat-samsung/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/arch/arm/plat-samsung/include
# Core
bool __init shmobile_smp_init_fallback_ops(void)
{
/* fallback on PSCI/smp_ops if no other DT based method is detected */
+ if (!IS_ENABLED(CONFIG_SMP))
+ return false;
+
return platform_can_secondary_boot() ? true : false;
}
config ARCH_ALPINE
bool "Annapurna Labs Alpine platform"
- select ALPINE_MSI
+ select ALPINE_MSI if PCI
help
This enables support for the Annapurna Labs Alpine
Soc family.
config ARCH_HISI
bool "Hisilicon SoC Family"
select ARM_TIMER_SP804
- select HISILICON_IRQ_MBIGEN
+ select HISILICON_IRQ_MBIGEN if PCI
help
This enables support for Hisilicon ARMv8 SoC family
/dts-v1/;
#include "exynos7.dtsi"
#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/clock/samsung,s2mps11.h>
/ {
model = "Samsung Exynos7 Espresso board based on EXYNOS7";
&rtc {
status = "okay";
+ clocks = <&clock_ccore PCLK_RTC>, <&s2mps15_osc S2MPS11_CLK_AP>;
+ clock-names = "rtc", "rtc_src";
};
&watchdog {
free_all_bootmem();
mem_init_print_info(NULL);
- show_mem(0);
}
void free_initmem(void)
UTS_MACHINE := $(OLDARCH)
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC += -mlittle-endian
-ifneq ($(cc-name),clang)
-override CC += -mno-strict-align
-endif
-override AS += -mlittle-endian
override LD += -EL
-override CROSS32CC += -mlittle-endian
override CROSS32AS += -mlittle-endian
LDEMULATION := lppc
GNUTARGET := powerpcle
MULTIPLEWORD := -mno-multiple
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
else
-ifeq ($(call cc-option-yn,-mbig-endian),y)
-override CC += -mbig-endian
-override AS += -mbig-endian
-endif
override LD += -EB
LDEMULATION := ppc
GNUTARGET := powerpc
MULTIPLEWORD := -mmultiple
endif
+cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+ifneq ($(cc-name),clang)
+ cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align
+endif
+
+aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+
ifeq ($(HAS_BIARCH),y)
override AS += -a$(CONFIG_WORD_SIZE)
override LD += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
KBUILD_AFLAGS += $(cpu-as-y)
KBUILD_CFLAGS += $(cpu-as-y)
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
+
head-y := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o
head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/cpufeature.h>
#include <asm/switch_to.h>
#define CHKSUM_BLOCK_SIZE 1
crypto_unregister_shash(&alg);
}
-module_init(crc32c_vpmsum_mod_init);
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
module_exit(crc32c_vpmsum_mod_fini);
MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
#endif
+/* Idle state entry routines */
+#ifdef CONFIG_PPC_P7_NAP
+#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+1: cmp cr0,r0,r0; \
+ bne 1b; \
+ IDLE_INST; \
+ b .
+#endif /* CONFIG_PPC_P7_NAP */
+
#endif
#ifndef __ASSEMBLY__
void apply_feature_fixups(void);
+void setup_feature_keys(void);
#endif
#endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
static inline void __giveup_spe(struct task_struct *t) { }
#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void flush_tmregs_to_thread(struct task_struct *);
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *t)
-{
-}
-#endif
-
static inline void clear_task_ebb(struct task_struct *t)
{
#ifdef CONFIG_PPC_BOOK3S_64
extern void xics_kexec_teardown_cpu(int secondary);
extern void xics_migrate_irqs_away(void);
extern void icp_native_eoi(struct irq_data *d);
+extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern int xics_retrigger(struct irq_data *data);
#ifdef CONFIG_SMP
extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
unsigned int strict_check);
int n = 0, l = 0;
char buffer[128];
- n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
+ n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
edev->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
- pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
+ pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
edev->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
* vector
*/
SET_SCRATCH0(r13) /* save r13 */
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
- /* Running native on arch 2.06 or later, check if we are
- * waking up from nap. We only handle no state loss and
- * supervisor state loss. We do -not- handle hypervisor
- * state loss at this time.
+ /*
+ * Running native on arch 2.06 or later, we may wakeup from winkle
+ * inside machine check. If yes, then last bit of HSPGR0 would be set
+ * to 1. Hence clear it unconditionally.
*/
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
- beq 9f
-
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- /* waking up from powersave (nap) state */
- cmpwi cr1,r13,2
- /* Total loss of HV state is fatal. let's just stay stuck here */
- OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
- bgt cr1,.
-9:
- OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
+ GET_PACA(r13)
+ clrrdi r13,r13,1
+ SET_PACA(r13)
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
b machine_check_powernv_early
* Check if thread was in power saving mode. We come here when any
* of the following is true:
* a. thread wasn't in power saving mode
- * b. thread was in power saving mode with no state loss or
- * supervisor state loss
+ * b. thread was in power saving mode with no state loss,
+ * supervisor state loss or hypervisor state loss.
*
- * Go back to nap again if (b) is true.
+ * Go back to nap/sleep/winkle mode again if (b) is true.
*/
rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
beq 4f /* No, it wasn;t */
/* Thread was in power saving mode. Go back to nap again. */
cmpwi r11,2
- bne 3f
- /* Supervisor state loss */
+ blt 3f
+ /* Supervisor/Hypervisor state loss */
li r0,1
stb r0,PACA_NAPSTATELOST(r13)
3: bl machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
GET_PACA(r13)
ld r1,PACAR1(r13)
- li r3,PNV_THREAD_NAP
- b pnv_enter_arch207_idle_mode
+ /*
+ * Check what idle state this CPU was in and go back to same mode
+ * again.
+ */
+ lbz r3,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi r3,PNV_THREAD_NAP
+ bgt 10f
+ IDLE_STATE_ENTER_SEQ(PPC_NAP)
+ /* No return */
+10:
+ cmpwi r3,PNV_THREAD_SLEEP
+ bgt 2f
+ IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+ /* No return */
+
+2:
+ /*
+ * Go back to winkle. Please note that this thread was woken up in
+ * machine check from winkle and have not restored the per-subcore
+ * state. Hence before going back to winkle, set last bit of HSPGR0
+ * to 1. This will make sure that if this thread gets woken up
+ * again at reset vector 0x100 then it will get chance to restore
+ * the subcore state.
+ */
+ ori r13,r13,1
+ SET_PACA(r13)
+ IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
+ /* No return */
4:
#endif
/*
PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
PSSCR_MTL_MASK
-/* Idle state entry routines */
-
-#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
- /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r0,0(r1); \
- ptesync; \
- ld r0,0(r1); \
-1: cmp cr0,r0,r0; \
- bne 1b; \
- IDLE_INST; \
- b .
-
.text
/*
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
*/
_GLOBAL(pnv_restore_hyp_resource)
- ld r2,PACATOC(r13);
BEGIN_FTR_SECTION
+ ld r2,PACATOC(r13);
/*
* POWER ISA 3. Use PSSCR to determine if we
* are waking up from deep idle state
*/
clrldi r5,r13,63
clrrdi r13,r13,1
+
+ /* Now that we are sure r13 is corrected, load TOC */
+ ld r2,PACATOC(r13);
cmpwi cr4,r5,1
mtspr SPRN_HSPRG0,r13
mce->in_use = 1;
mce->initiator = MCE_INITIATOR_CPU;
- if (handled)
+ /* Mark it recovered if we have handled it and MSR(RI=1). */
+ if (handled && (regs->msr & MSR_RI))
mce->disposition = MCE_DISPOSITION_RECOVERED;
else
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
static int get_phb_number(struct device_node *dn)
{
int ret, phb_id = -1;
+ u32 prop_32;
u64 prop;
/*
* reading "ibm,opal-phbid", only present in OPAL environment.
*/
ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
- if (ret)
- ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop);
+ if (ret) {
+ ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+ prop = prop_32;
+ }
if (!ret)
phb_id = (int)(prop & (MAX_PHBS - 1));
#endif
}
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void flush_tmregs_to_thread(struct task_struct *tsk)
-{
- /*
- * Process self tracing is not yet supported through
- * ptrace interface. Ptrace generic code should have
- * prevented this from happening in the first place.
- * Warn once here with the message, if some how it
- * is attempted.
- */
- WARN_ONCE(tsk == current,
- "Not expecting ptrace on self: TM regs may be incorrect\n");
-
- /*
- * If task is not current, it should have been flushed
- * already to it's thread_struct during __switch_to().
- */
-}
-#endif
-
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
/* Don't print anything after quiesce under OPAL, it crashes OFW */
if (of_platform != PLATFORM_OPAL) {
- prom_printf("Booting Linux via __start() ...\n");
+ prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
prom_debug("->dt_header_start=0x%x\n", hdr);
}
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/switch_to.h>
+#include <asm/tm.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
REG_OFFSET_END,
};
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+ /*
+ * If task is not current, it will have been flushed already to
+ * it's thread_struct during __switch_to().
+ *
+ * A reclaim flushes ALL the state.
+ */
+
+ if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+
+}
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
/**
* regs_query_register_offset() - query register offset from its name
* @name: the name of a register
* and we are running with enough of the MMU enabled to have our
* proper kernel virtual addresses
*
- * Find out what kind of machine we're on and save any data we need
- * from the early boot process (devtree is copied on pmac by prom_init()).
- * This is called very early on the boot process, after a minimal
- * MMU environment has been set up but before MMU_init is called.
+ * We do the initial parsing of the flat device-tree and prepares
+ * for the MMU to be fully initialized.
*/
extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
+ /* Configure static keys first, now that we're relocated. */
+ setup_feature_keys();
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
/* Apply all the dynamic patching */
apply_feature_fixups();
+ setup_feature_keys();
/* Initialize the hash table or TLB handling */
early_init_mmu();
#include <linux/security.h>
#include <linux/memblock.h>
+#include <asm/cpu_has_feature.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mmu.h>
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
$(call if_changed,vdso32ld)
# strip rule for the .so file
$(call if_changed,objcopy)
# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
$(call if_changed_dep,vdso32as)
# actual build commands
quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+ cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
quiet_cmd_vdso32as = VDSO32A $@
cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
$(call if_changed,vdso64ld)
# strip rule for the .so file
$(call if_changed,objcopy)
# assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
+$(obj-vdso64): %.o: %.S FORCE
$(call if_changed_dep,vdso64as)
# actual build commands
quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+ cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
quiet_cmd_vdso64as = VDSO64A $@
cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
stw r7,12(r1)
stw r8,8(r1)
- andi. r0,r4,1 /* is destination address even ? */
- cmplwi cr7,r0,0
+ rlwinm r0,r4,3,0x8
+ rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte */
+ cmplwi cr7,r0,0 /* is destination address even ? */
addic r12,r6,0
addi r6,r4,-4
neg r0,r4
66: addze r3,r12
addi r1,r1,16
beqlr+ cr7
- rlwinm r3,r3,8,0,31 /* swap bytes for odd destination */
+ rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
blr
/* read fault */
&__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
#endif
do_final_fixups();
+}
+void __init setup_feature_keys(void)
+{
/*
* Initialise jump label. This causes all the cpu/mmu_has_feature()
* checks to take on their correct polarity based on the current set of
gang = alloc_spu_gang();
SPUFS_I(inode)->i_ctx = NULL;
SPUFS_I(inode)->i_gang = gang;
- if (!gang)
+ if (!gang) {
+ ret = -ENOMEM;
goto out_iput;
+ }
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
!firmware_has_feature(FW_FEATURE_LPAR)) {
dev->dev.archdata.dma_ops = &dma_direct_ops;
+ /*
+ * Set the coherent DMA mask to prevent the iommu
+ * being used unnecessarily
+ */
+ dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
return;
}
#endif
}
/* Install interrupt handler */
- rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+ rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
+ "opal", NULL);
if (rc) {
irq_dispose_mapping(virq);
pr_warn("Error %d requesting irq %d (0x%x)\n",
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
+ pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */
}
early_param("iommu", iommu_setup);
-static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
{
- return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
- (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+ /*
+ * WARNING: We cannot rely on the resource flags. The Linux PCI
+ * allocation code sometimes decides to put a 64-bit prefetchable
+ * BAR in the 32-bit window, so we have to compare the addresses.
+ *
+ * For simplicity we only test resource start.
+ */
+ return (r->start >= phb->ioda.m64_base &&
+ r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
}
static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
sgsz = phb->ioda.m64_segsize;
for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
r = &pdev->resource[i];
- if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags))
+ if (!r->parent || !pnv_pci_is_m64(phb, r))
continue;
start = _ALIGN_DOWN(r->start - base, sgsz);
unsigned shift, unsigned long index,
unsigned long npages)
{
- __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
+ __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
unsigned long start, end, inc;
/* We'll invalidate DMA address in PE scope */
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags)) {
+ if (!pnv_pci_is_m64(phb, res)) {
dev_warn(&pdev->dev, "Don't support SR-IOV with"
" non M64 VF BAR%d: %pR. \n",
i, res);
index++;
}
} else if ((res->flags & IORESOURCE_MEM) &&
- !pnv_pci_is_mem_pref_64(res->flags)) {
+ !pnv_pci_is_m64(phb, res)) {
region.start = res->start -
phb->hose->mem_offset[0] -
phb->ioda.m32_pci_base;
bridge = bridge->bus->self;
}
- /* We fail back to M32 if M64 isn't supported */
- if (phb->ioda.m64_segsize &&
- pnv_pci_is_mem_pref_64(type))
+ /*
+ * We fall back to M32 if M64 isn't supported. We enforce the M64
+ * alignment for any 64-bit resource, PCIe doesn't care and
+ * bridges only do 64-bit prefetchable anyway.
+ */
+ if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64))
return phb->ioda.m64_segsize;
if (type & IORESOURCE_MEM)
return phb->ioda.m32_segsize;
w = NULL;
if (r->flags & type & IORESOURCE_IO)
w = &hose->io_resource;
- else if (pnv_pci_is_mem_pref_64(r->flags) &&
+ else if (pnv_pci_is_m64(phb, r) &&
(type & IORESOURCE_PREFETCH) &&
phb->ioda.m64_segsize)
w = &hose->mem_resources[1];
return dlpar_update_device_tree_lmb(lmb);
}
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
-{
- unsigned long section_nr;
- struct mem_section *mem_sect;
- struct memory_block *mem_block;
-
- section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
- mem_sect = __nr_to_section(section_nr);
-
- mem_block = find_memory_block(mem_sect);
- return mem_block;
-}
-
#ifdef CONFIG_MEMORY_HOTREMOVE
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
{
static int dlpar_add_lmb(struct of_drconf_cell *);
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+ unsigned long section_nr;
+ struct mem_section *mem_sect;
+ struct memory_block *mem_block;
+
+ section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+ mem_sect = __nr_to_section(section_nr);
+
+ mem_block = find_memory_block(mem_sect);
+ return mem_block;
+}
+
static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
{
struct memory_block *mem_block;
config PPC_XICS
def_bool n
select PPC_SMP_MUXED_IPI
+ select HARDIRQS_SW_RESEND
config PPC_ICP_NATIVE
def_bool n
.irq_mask = ics_opal_mask_irq,
.irq_unmask = ics_opal_unmask_irq,
.irq_eoi = NULL, /* Patched at init time */
- .irq_set_affinity = ics_opal_set_affinity
+ .irq_set_affinity = ics_opal_set_affinity,
+ .irq_set_type = xics_set_irq_type,
+ .irq_retrigger = xics_retrigger,
};
static int ics_opal_map(struct ics *ics, unsigned int virq);
.irq_mask = ics_rtas_mask_irq,
.irq_unmask = ics_rtas_unmask_irq,
.irq_eoi = NULL, /* Patched at init time */
- .irq_set_affinity = ics_rtas_set_affinity
+ .irq_set_affinity = ics_rtas_set_affinity,
+ .irq_set_type = xics_set_irq_type,
+ .irq_retrigger = xics_retrigger,
};
static int ics_rtas_map(struct ics *ics, unsigned int virq)
pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
- /* They aren't all level sensitive but we just don't really know */
- irq_set_status_flags(virq, IRQ_LEVEL);
+ /*
+ * Mark interrupts as edge sensitive by default so that resend
+ * actually works. The device-tree parsing will turn the LSIs
+ * back to level.
+ */
+ irq_clear_status_flags(virq, IRQ_LEVEL);
/* Don't call into ICS for IPIs */
if (hw == XICS_IPI) {
irq_hw_number_t *out_hwirq, unsigned int *out_flags)
{
- /* Current xics implementation translates everything
- * to level. It is not technically right for MSIs but this
- * is irrelevant at this point. We might get smarter in the future
- */
*out_hwirq = intspec[0];
- *out_flags = IRQ_TYPE_LEVEL_LOW;
+ /*
+ * If intsize is at least 2, we look for the type in the second cell,
+ * we assume the LSB indicates a level interrupt.
+ */
+ if (intsize > 1) {
+ if (intspec[1] & 1)
+ *out_flags = IRQ_TYPE_LEVEL_LOW;
+ else
+ *out_flags = IRQ_TYPE_EDGE_RISING;
+ } else
+ *out_flags = IRQ_TYPE_LEVEL_LOW;
+
+ return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+ /*
+ * We only support these. This has really no effect other than setting
+ * the corresponding descriptor bits mind you but those will in turn
+ * affect the resend function when re-enabling an edge interrupt.
+ *
+ * Set set the default to edge as explained in map().
+ */
+ if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+ flow_type = IRQ_TYPE_EDGE_RISING;
+
+ if (flow_type != IRQ_TYPE_EDGE_RISING &&
+ flow_type != IRQ_TYPE_LEVEL_LOW)
+ return -EINVAL;
+
+ irqd_set_trigger_type(d, flow_type);
+
+ return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+ /*
+ * We need to push a dummy CPPR when retriggering, since the subsequent
+ * EOI will try to pop it. Passing 0 works, as the function hard codes
+ * the priority value anyway.
+ */
+ xics_push_cppr(0);
+
+ /* Tell the core to do a soft retrigger */
return 0;
}
Select this option if you want to run the kernel as a guest under
the KVM hypervisor.
+config S390_GUEST_OLD_TRANSPORT
+ def_bool y
+ prompt "Guest support for old s390 virtio transport (DEPRECATED)"
+ depends on S390_GUEST
+ help
+ Enable this option to add support for the old s390-virtio
+ transport (i.e. virtio devices NOT based on virtio-ccw). This
+ type of virtio devices is only available on the experimental
+ kuli userspace or with old (< 2.6) qemu. If you are running
+ with a modern version of qemu (which supports virtio-ccw since
+ 1.4 and uses it by default since version 2.4), you probably won't
+ need this.
+
endmenu
}
}
+static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
static struct intel_uncore_ops snb_uncore_msr_ops = {
.init_box = snb_uncore_msr_init_box,
+ .enable_box = snb_uncore_msr_enable_box,
.exit_box = snb_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
}
}
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
static struct intel_uncore_ops skl_uncore_msr_ops = {
.init_box = skl_uncore_msr_init_box,
+ .enable_box = skl_uncore_msr_enable_box,
.exit_box = skl_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
static struct intel_uncore_type hswep_uncore_ha = {
.name = "ha",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 2,
.perf_ctr_bits = 48,
SNBEP_UNCORE_PCI_COMMON_INIT(),
static struct intel_uncore_type hswep_uncore_imc = {
.name = "imc",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
static struct intel_uncore_type hswep_uncore_qpi = {
.name = "qpi",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 3,
.perf_ctr_bits = 48,
.perf_ctr = SNBEP_PCI_PMON_CTR0,
static struct intel_uncore_type hswep_uncore_r3qpi = {
.name = "r3qpi",
- .num_counters = 4,
+ .num_counters = 3,
.num_boxes = 3,
.perf_ctr_bits = 44,
.constraints = hswep_uncore_r3qpi_constraints,
static struct intel_uncore_type bdx_uncore_imc = {
.name = "imc",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
*cursor &= 0xfe;
}
/*
- * Similar treatment for VEX3 prefix.
- * TODO: add XOP/EVEX treatment when insn decoder supports them
+ * Similar treatment for VEX3/EVEX prefix.
+ * TODO: add XOP treatment when insn decoder supports them
*/
- if (insn->vex_prefix.nbytes == 3) {
+ if (insn->vex_prefix.nbytes >= 3) {
/*
* vex2: c5 rvvvvLpp (has no b bit)
* vex3/xop: c4/8f rxbmmmmm wvvvvLpp
* evex: 62 rxbR00mm wvvvv1pp zllBVaaa
- * (evex will need setting of both b and x since
- * in non-sib encoding evex.x is 4th bit of MODRM.rm)
- * Setting VEX3.b (setting because it has inverted meaning):
+ * Setting VEX3.b (setting because it has inverted meaning).
+ * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+ * is the 4th bit of MODRM.rm, and needs the same treatment.
+ * For VEX3-encoded insns, VEX3.x value has no effect in
+ * non-SIB encoding, the change is superfluous but harmless.
*/
cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
- *cursor |= 0x20;
+ *cursor |= 0x60;
}
/*
reg = MODRM_REG(insn); /* Fetch modrm.reg */
reg2 = 0xff; /* Fetch vex.vvvv */
- if (insn->vex_prefix.nbytes == 2)
- reg2 = insn->vex_prefix.bytes[1];
- else if (insn->vex_prefix.nbytes == 3)
+ if (insn->vex_prefix.nbytes)
reg2 = insn->vex_prefix.bytes[2];
/*
- * TODO: add XOP, EXEV vvvv reading.
+ * TODO: add XOP vvvv reading.
*
* vex.vvvv field is in bits 6-3, bits are inverted.
* But in 32-bit mode, high-order bit may be ignored.
void uv_bios_init(void)
{
uv_systab = NULL;
- if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
+ if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+ !efi.uv_systab || efi_runtime_disabled()) {
pr_crit("UV: UVsystab: missing\n");
return;
}
{
struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+ const u32 STATUS_MASK = 0x80000037;
if (mmio->num_lines)
offset = to_interleave_offset(offset, mmio);
- return readl(mmio->addr.base + offset);
+ return readl(mmio->addr.base + offset) & STATUS_MASK;
}
static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
bool need_put = !!rbd_dev->opts;
ceph_oid_destroy(&rbd_dev->header_oid);
+ ceph_oloc_destroy(&rbd_dev->header_oloc);
rbd_put_client(rbd_dev->rbd_client);
rbd_spec_put(rbd_dev->spec);
}
spec->pool_id = (u64)rc;
- /* The ceph file layout needs to fit pool id in 32 bits */
-
- if (spec->pool_id > (u64)U32_MAX) {
- rbd_warn(NULL, "pool id too large (%llu > %u)",
- (unsigned long long)spec->pool_id, U32_MAX);
- rc = -EIO;
- goto err_out_client;
- }
-
rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
if (!rbd_dev) {
rc = -ENOMEM;
num_vqs = 1;
vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
- if (!vblk->vqs) {
- err = -ENOMEM;
- goto out;
- }
+ if (!vblk->vqs)
+ return -ENOMEM;
names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
- if (!names)
- goto err_names;
-
callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
- if (!callbacks)
- goto err_callbacks;
-
vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
- if (!vqs)
- goto err_vqs;
+ if (!names || !callbacks || !vqs) {
+ err = -ENOMEM;
+ goto out;
+ }
for (i = 0; i < num_vqs; i++) {
callbacks[i] = virtblk_done;
/* Discover virtqueues and write information to configuration. */
err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
if (err)
- goto err_find_vqs;
+ goto out;
for (i = 0; i < num_vqs; i++) {
spin_lock_init(&vblk->vqs[i].lock);
}
vblk->num_vqs = num_vqs;
- err_find_vqs:
+out:
kfree(vqs);
- err_vqs:
kfree(callbacks);
- err_callbacks:
kfree(names);
- err_names:
if (err)
kfree(vblk->vqs);
- out:
return err;
}
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/efi.h>
+#include <linux/vmalloc.h>
#define NO_FURTHER_WRITE_ACTION -1
int ret;
void *cap_hdr_temp;
- cap_hdr_temp = kmap(cap_info->pages[0]);
+ cap_hdr_temp = vmap(cap_info->pages, cap_info->index,
+ VM_MAP, PAGE_KERNEL);
if (!cap_hdr_temp) {
- pr_debug("%s: kmap() failed\n", __func__);
+ pr_debug("%s: vmap() failed\n", __func__);
return -EFAULT;
}
ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
- kunmap(cap_info->pages[0]);
+ vunmap(cap_hdr_temp);
if (ret) {
pr_err("%s: efi_capsule_update() failed\n", __func__);
return ret;
* map the capsule described by @capsule with its data in @pages and
* send it to the firmware via the UpdateCapsule() runtime service.
*
- * @capsule must be a virtual mapping of the first page in @pages
- * (@pages[0]) in the kernel address space. That is, a
- * capsule_header_t that describes the entire contents of the capsule
+ * @capsule must be a virtual mapping of the complete capsule update in the
+ * kernel address space, as the capsule can be consumed immediately.
+ * A capsule_header_t that describes the entire contents of the capsule
* must be at the start of the first data page.
*
* Even though this function will validate that the firmware supports
*/
mutex_lock(&afu->contexts_lock);
idr_preload(GFP_KERNEL);
- i = idr_alloc(&ctx->afu->contexts_idr, ctx,
- ctx->afu->adapter->native->sl_ops->min_pe,
+ i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe,
ctx->afu->num_procs, GFP_NOWAIT);
idr_preload_end();
mutex_unlock(&afu->contexts_lock);
u64 (*timebase_read)(struct cxl *adapter);
int capi_mode;
bool needs_reset_before_disable;
- int min_pe;
};
struct cxl_native {
struct bin_attribute cxl_attr;
int adapter_num;
int user_irqs;
+ int min_pe;
u64 ps_size;
u16 psl_rev;
u16 base_image;
return fail_psl_irq(afu, &irq_info);
}
-void native_irq_wait(struct cxl_context *ctx)
+static void native_irq_wait(struct cxl_context *ctx)
{
u64 dsisr;
int timeout = 1000;
static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev)
{
- u64 psl_dsnctl;
+ u64 psl_dsnctl, psl_fircntl;
u64 chipid;
u64 capp_unit_id;
int rc;
cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
/* snoop write mask */
cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL);
- /* set fir_accum */
- cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL);
+ /* set fir_cntl to recommended value for production env */
+ psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+ psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+ psl_fircntl |= 0x1ULL; /* ce_thresh */
+ cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl);
/* for debugging with trace arrays */
cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL);
.write_timebase_ctrl = write_timebase_ctrl_xsl,
.timebase_read = timebase_read_xsl,
.capi_mode = OPAL_PHB_CAPI_MODE_DMA,
- .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */
};
static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev)
{
if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) {
+ /* Mellanox CX-4 */
dev_info(&adapter->dev, "Device uses an XSL\n");
adapter->native->sl_ops = &xsl_ops;
+ adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */
} else {
dev_info(&adapter->dev, "Device uses a PSL\n");
adapter->native->sl_ops = &psl_ops;
/* Setup the PHB using arch provided callback */
phb->ops = &cxl_pcie_pci_ops;
phb->cfg_addr = NULL;
- phb->cfg_data = 0;
+ phb->cfg_data = NULL;
phb->private_data = afu;
phb->controller_ops = cxl_pci_controller_ops;
}
}
set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+ btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
revalidate_disk(btt->btt_disk);
return 0;
}
static DEVICE_ATTR_RW(namespace);
+static ssize_t size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_btt *nd_btt = to_nd_btt(dev);
+ ssize_t rc;
+
+ device_lock(dev);
+ if (dev->driver)
+ rc = sprintf(buf, "%llu\n", nd_btt->size);
+ else {
+ /* no size to convey if the btt instance is disabled */
+ rc = -ENXIO;
+ }
+ device_unlock(dev);
+
+ return rc;
+}
+static DEVICE_ATTR_RO(size);
+
static struct attribute *nd_btt_attributes[] = {
&dev_attr_sector_size.attr,
&dev_attr_namespace.attr,
&dev_attr_uuid.attr,
+ &dev_attr_size.attr,
NULL,
};
struct nd_namespace_common *ndns;
struct btt *btt;
unsigned long lbasize;
+ u64 size;
u8 *uuid;
int id;
};
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
pci_msi_domain_update_chip_ops(info);
+ info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+
domain = msi_create_irq_domain(fwnode, info, parent);
if (!domain)
return NULL;
static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
long timeout)
{
- struct rio_channel *ch = NULL;
- struct rio_channel *new_ch = NULL;
+ struct rio_channel *ch;
+ struct rio_channel *new_ch;
struct conn_req *req;
struct cm_peer *peer;
int found = 0;
spin_unlock_bh(&ch->lock);
riocm_put_channel(ch);
+ ch = NULL;
kfree(req);
down_read(&rdev_sem);
if (!found) {
/* If peer device object not found, simply ignore the request */
err = -ENODEV;
- goto err_nodev;
+ goto err_put_new_ch;
}
new_ch->rdev = peer->rdev;
*new_ch_id = new_ch->id;
return new_ch;
+
+err_put_new_ch:
+ spin_lock_bh(&idr_lock);
+ idr_remove(&ch_idr, new_ch->id);
+ spin_unlock_bh(&idr_lock);
+ riocm_put_channel(new_ch);
+
err_put:
- riocm_put_channel(ch);
-err_nodev:
- if (new_ch) {
- spin_lock_bh(&idr_lock);
- idr_remove(&ch_idr, new_ch->id);
- spin_unlock_bh(&idr_lock);
- riocm_put_channel(new_ch);
- }
+ if (ch)
+ riocm_put_channel(ch);
*new_ch_id = 0;
return ERR_PTR(err);
}
# it under the terms of the GNU General Public License (version 2 only)
# as published by the Free Software Foundation.
-obj-$(CONFIG_S390_GUEST) += kvm_virtio.o virtio_ccw.o
+s390-virtio-objs := virtio_ccw.o
+ifdef CONFIG_S390_GUEST_OLD_TRANSPORT
+s390-virtio-objs += kvm_virtio.o
+endif
+obj-$(CONFIG_S390_GUEST) += $(s390-virtio-objs)
if (test_devices_support(total_memory_size) < 0)
return -ENODEV;
+ pr_warn("The s390-virtio transport is deprecated. Please switch to a modern host providing virtio-ccw.\n");
+
rc = vmem_add_mapping(total_memory_size, PAGE_SIZE);
if (rc)
return rc;
}
/* code for early console output with virtio_console */
-static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+static int early_put_chars(u32 vtermno, const char *buf, int count)
{
char scratch[17];
unsigned int len = count;
vhost_disable_notify(&vsock->dev, vq);
for (;;) {
+ u32 len;
+
if (!vhost_vsock_more_replies(vsock)) {
/* Stop tx until the device processes already
* pending replies. Leave tx virtqueue
continue;
}
+ len = pkt->len;
+
/* Only accept correctly addressed packets */
if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
virtio_transport_recv_pkt(pkt);
else
virtio_transport_free_pkt(pkt);
- vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
added = true;
}
* host should service the ring ASAP. */
if (out_sgs)
vq->notify(&vq->vq);
+ if (indirect)
+ kfree(desc);
END_USE(vq);
return -ENOSPC;
}
if (indirect)
kfree(desc);
+ END_USE(vq);
return -EIO;
}
return 0;
}
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 ref_root, u64 bytenr, u64 num_bytes)
-{
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_head *ref_head;
- int ret = 0;
-
- if (!fs_info->quota_enabled || !is_fstree(ref_root))
- return 0;
-
- delayed_refs = &trans->transaction->delayed_refs;
-
- spin_lock(&delayed_refs->lock);
- ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
- if (!ref_head) {
- ret = -ENOENT;
- goto out;
- }
- WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
- ref_head->qgroup_ref_root = ref_root;
- ref_head->qgroup_reserved = num_bytes;
-out:
- spin_unlock(&delayed_refs->lock);
- return ret;
-}
-
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 ref_root, u64 bytenr, u64 num_bytes);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
*/
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
+ /*
+ * An ordered extent might have started before and completed
+ * already with io errors, in which case the inode was not
+ * updated and we end up here. So check the inode's mapping
+ * flags for any errors that might have happened while doing
+ * writeback of file data.
+ */
+ ret = btrfs_inode_check_errors(inode);
inode_unlock(inode);
goto out;
}
found_key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
ret = PTR_ERR_OR_ZERO(inode);
- if (ret && ret != -ESTALE)
+ if (ret && ret != -ENOENT)
goto out;
- if (ret == -ESTALE && root == root->fs_info->tree_root) {
+ if (ret == -ENOENT && root == root->fs_info->tree_root) {
struct btrfs_root *dead_root;
struct btrfs_fs_info *fs_info = root->fs_info;
int is_dead_root = 0;
* Inode is already gone but the orphan item is still there,
* kill the orphan item.
*/
- if (ret == -ESTALE) {
+ if (ret == -ENOENT) {
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
/*
* read an inode from the btree into the in-memory inode
*/
-static void btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
filled = true;
path = btrfs_alloc_path();
- if (!path)
+ if (!path) {
+ ret = -ENOMEM;
goto make_bad;
+ }
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
- if (ret)
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
goto make_bad;
+ }
leaf = path->nodes[0];
}
btrfs_update_iflags(inode);
- return;
+ return 0;
make_bad:
btrfs_free_path(path);
make_bad_inode(inode);
+ return ret;
}
/*
int err = 0;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
+ u64 last_unlink_trans;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
if (err)
goto out;
+ last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
/* now the directory is empty */
err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
dentry->d_name.name, dentry->d_name.len);
- if (!err)
+ if (!err) {
btrfs_i_size_write(inode, 0);
+ /*
+ * Propagate the last_unlink_trans value of the deleted dir to
+ * its parent directory. This is to prevent an unrecoverable
+ * log tree in the case we do something like this:
+ * 1) create dir foo
+ * 2) create snapshot under dir foo
+ * 3) delete the snapshot
+ * 4) rmdir foo
+ * 5) mkdir foo
+ * 6) fsync foo or some file inside foo
+ */
+ if (last_unlink_trans >= trans->transid)
+ BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
+ }
out:
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- btrfs_read_locked_inode(inode);
+ int ret;
+
+ ret = btrfs_read_locked_inode(inode);
if (!is_bad_inode(inode)) {
inode_tree_add(inode);
unlock_new_inode(inode);
} else {
unlock_new_inode(inode);
iput(inode);
- inode = ERR_PTR(-ESTALE);
+ ASSERT(ret < 0);
+ inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
}
}
u64 parent_ino;
u64 ino;
u64 gen;
- bool is_orphan;
struct list_head update_refs;
};
char name[];
};
+static void inconsistent_snapshot_error(struct send_ctx *sctx,
+ enum btrfs_compare_tree_result result,
+ const char *what)
+{
+ const char *result_string;
+
+ switch (result) {
+ case BTRFS_COMPARE_TREE_NEW:
+ result_string = "new";
+ break;
+ case BTRFS_COMPARE_TREE_DELETED:
+ result_string = "deleted";
+ break;
+ case BTRFS_COMPARE_TREE_CHANGED:
+ result_string = "updated";
+ break;
+ case BTRFS_COMPARE_TREE_SAME:
+ ASSERT(0);
+ result_string = "unchanged";
+ break;
+ default:
+ ASSERT(0);
+ result_string = "unexpected";
+ }
+
+ btrfs_err(sctx->send_root->fs_info,
+ "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
+ result_string, what, sctx->cmp_key->objectid,
+ sctx->send_root->root_key.objectid,
+ (sctx->parent_root ?
+ sctx->parent_root->root_key.objectid : 0));
+}
+
static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
static struct waiting_dir_move *
* was already unlinked/moved, so we can safely assume that we will not
* overwrite anything at this point in time.
*/
- if (other_inode > sctx->send_progress) {
+ if (other_inode > sctx->send_progress ||
+ is_waiting_for_move(sctx, other_inode)) {
ret = get_inode_info(sctx->parent_root, other_inode, NULL,
who_gen, NULL, NULL, NULL, NULL);
if (ret < 0)
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
+ if (ret > 0)
+ ret = -ENOENT;
if (ret < 0)
goto out;
}
if (loc.objectid > send_progress) {
+ struct orphan_dir_info *odi;
+
+ odi = get_orphan_dir_info(sctx, dir);
+ free_orphan_dir_info(sctx, odi);
ret = 0;
goto out;
}
pm->parent_ino = parent_ino;
pm->ino = ino;
pm->gen = ino_gen;
- pm->is_orphan = is_orphan;
INIT_LIST_HEAD(&pm->list);
INIT_LIST_HEAD(&pm->update_refs);
RB_CLEAR_NODE(&pm->node);
return NULL;
}
+static int path_loop(struct send_ctx *sctx, struct fs_path *name,
+ u64 ino, u64 gen, u64 *ancestor_ino)
+{
+ int ret = 0;
+ u64 parent_inode = 0;
+ u64 parent_gen = 0;
+ u64 start_ino = ino;
+
+ *ancestor_ino = 0;
+ while (ino != BTRFS_FIRST_FREE_OBJECTID) {
+ fs_path_reset(name);
+
+ if (is_waiting_for_rm(sctx, ino))
+ break;
+ if (is_waiting_for_move(sctx, ino)) {
+ if (*ancestor_ino == 0)
+ *ancestor_ino = ino;
+ ret = get_first_ref(sctx->parent_root, ino,
+ &parent_inode, &parent_gen, name);
+ } else {
+ ret = __get_cur_name_and_parent(sctx, ino, gen,
+ &parent_inode,
+ &parent_gen, name);
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+ if (ret < 0)
+ break;
+ if (parent_inode == start_ino) {
+ ret = 1;
+ if (*ancestor_ino == 0)
+ *ancestor_ino = ino;
+ break;
+ }
+ ino = parent_inode;
+ gen = parent_gen;
+ }
+ return ret;
+}
+
static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
{
struct fs_path *from_path = NULL;
u64 parent_ino, parent_gen;
struct waiting_dir_move *dm = NULL;
u64 rmdir_ino = 0;
+ u64 ancestor;
+ bool is_orphan;
int ret;
name = fs_path_alloc();
dm = get_waiting_dir_move(sctx, pm->ino);
ASSERT(dm);
rmdir_ino = dm->rmdir_ino;
+ is_orphan = dm->orphanized;
free_waiting_dir_move(sctx, dm);
- if (pm->is_orphan) {
+ if (is_orphan) {
ret = gen_unique_name(sctx, pm->ino,
pm->gen, from_path);
} else {
goto out;
sctx->send_progress = sctx->cur_ino + 1;
+ ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ LIST_HEAD(deleted_refs);
+ ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
+ ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
+ &pm->update_refs, &deleted_refs,
+ is_orphan);
+ if (ret < 0)
+ goto out;
+ if (rmdir_ino) {
+ dm = get_waiting_dir_move(sctx, pm->ino);
+ ASSERT(dm);
+ dm->rmdir_ino = rmdir_ino;
+ }
+ goto out;
+ }
fs_path_reset(name);
to_path = name;
name = NULL;
/* already deleted */
goto finish;
}
- ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
+ ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
if (ret < 0)
goto out;
if (!ret)
* and old parent(s).
*/
list_for_each_entry(cur, &pm->update_refs, list) {
- if (cur->dir == rmdir_ino)
+ /*
+ * The parent inode might have been deleted in the send snapshot
+ */
+ ret = get_inode_info(sctx->send_root, cur->dir, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ if (ret == -ENOENT) {
+ ret = 0;
continue;
+ }
+ if (ret < 0)
+ goto out;
+
ret = send_utimes(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
u64 left_gen;
u64 right_gen;
int ret = 0;
+ struct waiting_dir_move *wdm;
if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
return 0;
goto out;
}
- if (is_waiting_for_move(sctx, di_key.objectid)) {
+ wdm = get_waiting_dir_move(sctx, di_key.objectid);
+ if (wdm && !wdm->orphanized) {
ret = add_pending_dir_move(sctx,
sctx->cur_ino,
sctx->cur_inode_gen,
ret = is_ancestor(sctx->parent_root,
sctx->cur_ino, sctx->cur_inode_gen,
ino, path_before);
- break;
+ if (ret)
+ break;
}
fs_path_reset(path_before);
goto out;
if (ret) {
struct name_cache_entry *nce;
+ struct waiting_dir_move *wdm;
ret = orphanize_inode(sctx, ow_inode, ow_gen,
cur->full_path);
if (ret < 0)
goto out;
+
+ /*
+ * If ow_inode has its rename operation delayed
+ * make sure that its orphanized name is used in
+ * the source path when performing its rename
+ * operation.
+ */
+ if (is_waiting_for_move(sctx, ow_inode)) {
+ wdm = get_waiting_dir_move(sctx,
+ ow_inode);
+ ASSERT(wdm);
+ wdm->orphanized = true;
+ }
+
/*
* Make sure we clear our orphanized inode's
* name from the name cache. This is because the
name_cache_delete(sctx, nce);
kfree(nce);
}
+
+ /*
+ * ow_inode might currently be an ancestor of
+ * cur_ino, therefore compute valid_path (the
+ * current path of cur_ino) again because it
+ * might contain the pre-orphanization name of
+ * ow_inode, which is no longer valid.
+ */
+ fs_path_reset(valid_path);
+ ret = get_cur_path(sctx, sctx->cur_ino,
+ sctx->cur_inode_gen, valid_path);
+ if (ret < 0)
+ goto out;
} else {
ret = send_unlink(sctx, cur->full_path);
if (ret < 0)
{
int ret = 0;
- BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+ if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ inconsistent_snapshot_error(sctx, result, "reference");
+ return -EIO;
+ }
if (!sctx->cur_inode_new_gen &&
sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
{
int ret = 0;
- BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+ if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ inconsistent_snapshot_error(sctx, result, "xattr");
+ return -EIO;
+ }
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result == BTRFS_COMPARE_TREE_NEW)
{
int ret = 0;
- BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+ if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ inconsistent_snapshot_error(sctx, result, "extent");
+ return -EIO;
+ }
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result != BTRFS_COMPARE_TREE_DELETED)
static int btrfs_check_ref_name_override(struct extent_buffer *eb,
const int slot,
const struct btrfs_key *key,
- struct inode *inode)
+ struct inode *inode,
+ u64 *other_ino)
{
int ret;
struct btrfs_path *search_path;
search_path, parent,
name, this_name_len, 0);
if (di && !IS_ERR(di)) {
- ret = 1;
+ struct btrfs_key di_key;
+
+ btrfs_dir_item_key_to_cpu(search_path->nodes[0],
+ di, &di_key);
+ if (di_key.type == BTRFS_INODE_ITEM_KEY) {
+ ret = 1;
+ *other_ino = di_key.objectid;
+ } else {
+ ret = -EAGAIN;
+ }
goto out;
} else if (IS_ERR(di)) {
ret = PTR_ERR(di);
if ((min_key.type == BTRFS_INODE_REF_KEY ||
min_key.type == BTRFS_INODE_EXTREF_KEY) &&
BTRFS_I(inode)->generation == trans->transid) {
+ u64 other_ino = 0;
+
ret = btrfs_check_ref_name_override(path->nodes[0],
path->slots[0],
- &min_key, inode);
+ &min_key, inode,
+ &other_ino);
if (ret < 0) {
err = ret;
goto out_unlock;
} else if (ret > 0) {
- err = 1;
- btrfs_set_log_full_commit(root->fs_info, trans);
- goto out_unlock;
+ struct btrfs_key inode_key;
+ struct inode *other_inode;
+
+ if (ins_nr > 0) {
+ ins_nr++;
+ } else {
+ ins_nr = 1;
+ ins_start_slot = path->slots[0];
+ }
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, ins_start_slot,
+ ins_nr, inode_only,
+ logged_isize);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+ }
+ ins_nr = 0;
+ btrfs_release_path(path);
+ inode_key.objectid = other_ino;
+ inode_key.type = BTRFS_INODE_ITEM_KEY;
+ inode_key.offset = 0;
+ other_inode = btrfs_iget(root->fs_info->sb,
+ &inode_key, root,
+ NULL);
+ /*
+ * If the other inode that had a conflicting dir
+ * entry was deleted in the current transaction,
+ * we don't need to do more work nor fallback to
+ * a transaction commit.
+ */
+ if (IS_ERR(other_inode) &&
+ PTR_ERR(other_inode) == -ENOENT) {
+ goto next_key;
+ } else if (IS_ERR(other_inode)) {
+ err = PTR_ERR(other_inode);
+ goto out_unlock;
+ }
+ /*
+ * We are safe logging the other inode without
+ * acquiring its i_mutex as long as we log with
+ * the LOG_INODE_EXISTS mode. We're safe against
+ * concurrent renames of the other inode as well
+ * because during a rename we pin the log and
+ * update the log with the new name before we
+ * unpin it.
+ */
+ err = btrfs_log_inode(trans, root, other_inode,
+ LOG_INODE_EXISTS,
+ 0, LLONG_MAX, ctx);
+ iput(other_inode);
+ if (err)
+ goto out_unlock;
+ else
+ goto next_key;
}
}
ins_nr = 0;
}
btrfs_release_path(path);
-
+next_key:
if (min_key.offset < (u64)-1) {
min_key.offset++;
} else if (min_key.type < max_key.type) {
if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
break;
- if (IS_ROOT(parent))
+ if (IS_ROOT(parent)) {
+ inode = d_inode(parent);
+ if (btrfs_must_commit_transaction(trans, inode))
+ ret = 1;
break;
+ }
parent = dget_parent(parent);
dput(old_parent);
{
struct inode *inode = &ci->vfs_inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
- struct ceph_mds_session *session = *psession;
+ struct ceph_mds_session *session = NULL;
int mds;
+
dout("ceph_flush_snaps %p\n", inode);
+ if (psession)
+ session = *psession;
retry:
spin_lock(&ci->i_ceph_lock);
if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
} else {
path = NULL;
pathlen = 0;
+ pathbase = 0;
}
spin_lock(&ci->i_ceph_lock);
case 0:
break;
case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
extern void nfs4_kill_renewd(struct nfs_client *);
extern void nfs4_renew_state(struct work_struct *);
+extern void nfs4_set_lease_period(struct nfs_client *clp,
+ unsigned long lease,
+ unsigned long lastrenewed);
+
/* nfs4state.c */
struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
if (err == 0) {
- struct nfs_client *clp = server->nfs_client;
-
- spin_lock(&clp->cl_lock);
- clp->cl_lease_time = fsinfo->lease_time * HZ;
- clp->cl_last_renewal = now;
- spin_unlock(&clp->cl_lock);
+ nfs4_set_lease_period(server->nfs_client,
+ fsinfo->lease_time * HZ,
+ now);
break;
}
err = nfs4_handle_exception(server, err, &exception);
cancel_delayed_work_sync(&clp->cl_renewd);
}
+/**
+ * nfs4_set_lease_period - Sets the lease period on a nfs_client
+ *
+ * @clp: pointer to nfs_client
+ * @lease: new value for lease period
+ * @lastrenewed: time at which lease was last renewed
+ */
+void nfs4_set_lease_period(struct nfs_client *clp,
+ unsigned long lease,
+ unsigned long lastrenewed)
+{
+ spin_lock(&clp->cl_lock);
+ clp->cl_lease_time = lease;
+ clp->cl_last_renewal = lastrenewed;
+ spin_unlock(&clp->cl_lock);
+
+ /* Cap maximum reconnect timeout at 1/2 lease period */
+ rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1);
+}
+
/*
* Local variables:
* c-basic-offset: 8
{
int status;
struct nfs_fsinfo fsinfo;
+ unsigned long now;
if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
nfs4_schedule_state_renewal(clp);
return 0;
}
+ now = jiffies;
status = nfs4_proc_get_lease_time(clp, &fsinfo);
if (status == 0) {
- /* Update lease time and schedule renewal */
- spin_lock(&clp->cl_lock);
- clp->cl_lease_time = fsinfo.lease_time * HZ;
- clp->cl_last_renewal = jiffies;
- spin_unlock(&clp->cl_lock);
-
+ nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now);
nfs4_schedule_state_renewal(clp);
}
cached = 0;
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
- pages[lru] = global_page_state(NR_LRU_BASE + lru);
+ pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
available = si_mem_available();
#include <asm-generic/qrwlock_types.h>
/*
- * Writer states & reader shift and bias
+ * Writer states & reader shift and bias.
+ *
+ * | +0 | +1 | +2 | +3 |
+ * ----+----+----+----+----+
+ * LE | 78 | 56 | 34 | 12 | 0x12345678
+ * ----+----+----+----+----+
+ * | wr | rd |
+ * +----+----+----+----+
+ *
+ * ----+----+----+----+----+
+ * BE | 12 | 34 | 56 | 78 | 0x12345678
+ * ----+----+----+----+----+
+ * | rd | wr |
+ * +----+----+----+----+
*/
#define _QW_WAITING 1 /* A writer is waiting */
#define _QW_LOCKED 0xff /* A writer holds the lock */
(void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
}
+/**
+ * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: the write byte address of a queue rwlock
+ */
+static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
+{
+ return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
+}
+
/**
* queued_write_unlock - release write lock of a queue rwlock
* @lock : Pointer to queue rwlock structure
*/
static inline void queued_write_unlock(struct qrwlock *lock)
{
- smp_store_release((u8 *)&lock->cnts, 0);
+ smp_store_release(__qrwlock_write_byte(lock), 0);
}
/*
MSI_FLAG_MULTI_PCI_MSI = (1 << 2),
/* Support PCI MSIX interrupts */
MSI_FLAG_PCI_MSIX = (1 << 3),
+ /* Needs early activate, required for PCI */
+ MSI_FLAG_ACTIVATE_EARLY = (1 << 4),
};
int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
u64 parent_gen;
u64 generation;
int pin_count;
+#ifdef CONFIG_CGROUP_PERF
int nr_cgroups; /* cgroup evts */
+#endif
void *task_ctx_data; /* pmu specific data */
struct rcu_head rcu_head;
};
unsigned int hrtimer_active;
struct pmu *unique_pmu;
+#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup *cgrp;
+#endif
};
struct perf_output_handle {
struct rpc_xprt *,
void *),
void *data);
+void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+ unsigned long timeo);
const char *rpc_proc_name(const struct rpc_task *task);
#endif /* __KERNEL__ */
struct work_struct task_cleanup;
struct timer_list timer;
unsigned long last_used,
- idle_timeout;
+ idle_timeout,
+ max_reconnect_timeout;
/*
* Send stuff
*/
#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H
-#define _UAPI_LINUX_VIRTIO_VOSCK_H
+#define _UAPI_LINUX_VIRTIO_VSOCK_H
#include <linux/types.h>
#include <linux/virtio_ids.h>
*
* Of course the contents will be ABI, but that's up the AFU driver.
*/
- size_t data_size;
- u8 data[];
+ __u32 data_size;
+ __u8 data[];
};
struct cxl_event {
}
}
}
+
+/*
+ * Update cpuctx->cgrp so that it is set when first cgroup event is added and
+ * cleared when last cgroup event is removed.
+ */
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+ struct perf_event_context *ctx, bool add)
+{
+ struct perf_cpu_context *cpuctx;
+
+ if (!is_cgroup_event(event))
+ return;
+
+ if (add && ctx->nr_cgroups++)
+ return;
+ else if (!add && --ctx->nr_cgroups)
+ return;
+ /*
+ * Because cgroup events are always per-cpu events,
+ * this will always be called from the right CPU.
+ */
+ cpuctx = __get_cpu_context(ctx);
+ cpuctx->cgrp = add ? event->cgrp : NULL;
+}
+
#else /* !CONFIG_CGROUP_PERF */
static inline bool
struct perf_event_context *ctx)
{
}
+
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+ struct perf_event_context *ctx, bool add)
+{
+}
+
#endif
/*
static void
list_add_event(struct perf_event *event, struct perf_event_context *ctx)
{
+
lockdep_assert_held(&ctx->lock);
WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
list_add_tail(&event->group_entry, list);
}
- if (is_cgroup_event(event))
- ctx->nr_cgroups++;
+ list_update_cgroup_event(event, ctx, true);
list_add_rcu(&event->event_entry, &ctx->event_list);
ctx->nr_events++;
static void
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
{
- struct perf_cpu_context *cpuctx;
-
WARN_ON_ONCE(event->ctx != ctx);
lockdep_assert_held(&ctx->lock);
event->attach_state &= ~PERF_ATTACH_CONTEXT;
- if (is_cgroup_event(event)) {
- ctx->nr_cgroups--;
- /*
- * Because cgroup events are always per-cpu events, this will
- * always be called from the right CPU.
- */
- cpuctx = __get_cpu_context(ctx);
- /*
- * If there are no more cgroup events then clear cgrp to avoid
- * stale pointer in update_cgrp_time_from_cpuctx().
- */
- if (!ctx->nr_cgroups)
- cpuctx->cgrp = NULL;
- }
+ list_update_cgroup_event(event, ctx, false);
ctx->nr_events--;
if (event->attr.inherit_stat)
static inline int
event_filter_match(struct perf_event *event)
{
- return (event->cpu == -1 || event->cpu == smp_processor_id())
- && perf_cgroup_match(event) && pmu_filter_match(event);
+ return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
+ perf_cgroup_match(event) && pmu_filter_match(event);
}
static void
* maintained, otherwise bogus information is return
* via read() for time_enabled, time_running:
*/
- if (event->state == PERF_EVENT_STATE_INACTIVE
- && !event_filter_match(event)) {
+ if (event->state == PERF_EVENT_STATE_INACTIVE &&
+ !event_filter_match(event)) {
delta = tstamp - event->tstamp_stopped;
event->tstamp_running += delta;
event->tstamp_stopped = tstamp;
lockdep_assert_held(&ctx->mutex);
- event->ctx = ctx;
if (event->cpu != -1)
event->cpu = cpu;
+ /*
+ * Ensures that if we can observe event->ctx, both the event and ctx
+ * will be 'complete'. See perf_iterate_sb_cpu().
+ */
+ smp_store_release(&event->ctx, ctx);
+
if (!task) {
cpu_function_call(cpu, __perf_install_in_context, event);
return;
struct perf_event *event;
list_for_each_entry_rcu(event, &pel->list, sb_list) {
+ /*
+ * Skip events that are not fully formed yet; ensure that
+ * if we observe event->ctx, both event and ctx will be
+ * complete enough. See perf_install_in_context().
+ */
+ if (!smp_load_acquire(&event->ctx))
+ continue;
+
if (event->state < PERF_EVENT_STATE_INACTIVE)
continue;
if (!event_filter_match(event))
* Futex flags used to encode options to functions and preserve them across
* restarts.
*/
-#define FLAGS_SHARED 0x01
+#ifdef CONFIG_MMU
+# define FLAGS_SHARED 0x01
+#else
+/*
+ * NOMMU does not have per process address space. Let the compiler optimize
+ * code away.
+ */
+# define FLAGS_SHARED 0x00
+#endif
#define FLAGS_CLOCKRT 0x02
#define FLAGS_HAS_TIMEOUT 0x04
if (!key->both.ptr)
return;
+ /*
+ * On MMU less systems futexes are always "private" as there is no per
+ * process address space. We need the smp wmb nevertheless - yes,
+ * arch/blackfin has MMU less SMP ...
+ */
+ if (!IS_ENABLED(CONFIG_MMU)) {
+ smp_mb(); /* explicit smp_mb(); (B) */
+ return;
+ }
+
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
ihold(key->shared.inode); /* implies smp_mb(); (B) */
return;
}
+ if (!IS_ENABLED(CONFIG_MMU))
+ return;
+
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
iput(key->shared.inode);
else
dev_dbg(dev, "irq [%d-%d] for MSI\n",
virq, virq + desc->nvec_used - 1);
+ /*
+ * This flag is set by the PCI layer as we need to activate
+ * the MSI entries before the PCI layer enables MSI in the
+ * card. Otherwise the card latches a random msi message.
+ */
+ if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
+ struct irq_data *irq_data;
+
+ irq_data = irq_domain_get_irq_data(domain, desc->irq);
+ irq_domain_activate_irq(irq_data);
+ }
}
return 0;
goto gotlock;
}
}
- WRITE_ONCE(pn->state, vcpu_halted);
+ WRITE_ONCE(pn->state, vcpu_hashed);
qstat_inc(qstat_pv_wait_head, true);
qstat_inc(qstat_pv_wait_again, waitcnt);
pv_wait(&l->locked, _Q_SLOW_VAL);
*/
if ((counter == qstat_pv_latency_kick) ||
(counter == qstat_pv_latency_wake)) {
- stat = 0;
if (kicks)
stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
}
#include <linux/context_tracking.h>
#include <linux/compiler.h>
#include <linux/frame.h>
+#include <linux/prefetch.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
EXPORT_PER_CPU_SYMBOL(kstat);
EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
+/*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+ struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+ prefetch(curr);
+ prefetch(&curr->exec_start);
+}
+
/*
* Return accounted runtime for the task.
* In case the task is currently running, return the runtime plus current's
* thread, breaking clock_gettime().
*/
if (task_current(rq, p) && task_on_rq_queued(p)) {
+ prefetch_curr_exec_start(p);
update_rq_clock(rq);
p->sched_class->update_curr(rq);
}
if (old_idx == IDX_INVALID) {
cp->size++;
- cp->elements[cp->size - 1].dl = 0;
+ cp->elements[cp->size - 1].dl = dl;
cp->elements[cp->size - 1].cpu = cpu;
cp->elements[cpu].idx = cp->size - 1;
cpudl_change_key(cp, cp->size - 1, dl);
*/
void account_idle_ticks(unsigned long ticks)
{
+ cputime_t cputime, steal;
if (sched_clock_irqtime) {
irqtime_account_idle_ticks(ticks);
return;
}
- account_idle_time(jiffies_to_cputime(ticks));
+ cputime = jiffies_to_cputime(ticks);
+ steal = steal_account_process_time(cputime);
+
+ if (steal >= cputime)
+ return;
+
+ cputime -= steal;
+ account_idle_time(cputime);
}
/*
*
* XXX figure out if select_task_rq_dl() deals with offline cpus.
*/
- if (unlikely(!rq->online))
+ if (unlikely(!rq->online)) {
+ lockdep_unpin_lock(&rq->lock, rf.cookie);
rq = dl_task_offline_migration(rq, p);
+ rf.cookie = lockdep_pin_lock(&rq->lock);
+ }
/*
* Queueing this task back might have overloaded rq, check if we need
pcfs_rq = tg->parent->cfs_rq[cpu];
cfs_rq->throttle_count = pcfs_rq->throttle_count;
- pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+ cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
}
/* conditionally throttle active cfs_rq's from put_prev_entity() */
list_del(&page->lru);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
+ h->max_huge_pages--;
update_and_free_page(h, page);
}
spin_unlock(&hugetlb_lock);
new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
QUARANTINE_FRACTION;
percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
- if (WARN_ONCE(new_quarantine_size < percpu_quarantines,
- "Too little memory, disabling global KASAN quarantine.\n"))
- new_quarantine_size = 0;
- else
- new_quarantine_size -= percpu_quarantines;
+ new_quarantine_size = (new_quarantine_size < percpu_quarantines) ?
+ 0 : new_quarantine_size - percpu_quarantines;
WRITE_ONCE(quarantine_size, new_quarantine_size);
last = global_quarantine.head;
static DEFINE_IDR(mem_cgroup_idr);
-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
{
- atomic_inc(&memcg->id.ref);
+ atomic_add(n, &memcg->id.ref);
}
-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
{
- if (atomic_dec_and_test(&memcg->id.ref)) {
+ while (!atomic_inc_not_zero(&memcg->id.ref)) {
+ /*
+ * The root cgroup cannot be destroyed, so it's refcount must
+ * always be >= 1.
+ */
+ if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+ VM_BUG_ON(1);
+ break;
+ }
+ memcg = parent_mem_cgroup(memcg);
+ if (!memcg)
+ memcg = root_mem_cgroup;
+ }
+ return memcg;
+}
+
+static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
+{
+ if (atomic_sub_and_test(n, &memcg->id.ref)) {
idr_remove(&mem_cgroup_idr, memcg->id.id);
memcg->id.id = 0;
}
}
+static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+ mem_cgroup_id_get_many(memcg, 1);
+}
+
+static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+ mem_cgroup_id_put_many(memcg, 1);
+}
+
/**
* mem_cgroup_from_id - look up a memcg from a memcg id
* @id: the memcg id to look up
if (!mem_cgroup_is_root(mc.from))
page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
+ mem_cgroup_id_put_many(mc.from, mc.moved_swap);
+
/*
* we charged both to->memory and to->memsw, so we
* should uncharge to->memory.
if (!mem_cgroup_is_root(mc.to))
page_counter_uncharge(&mc.to->memory, mc.moved_swap);
- css_put_many(&mc.from->css, mc.moved_swap);
+ mem_cgroup_id_get_many(mc.to, mc.moved_swap);
+ css_put_many(&mc.to->css, mc.moved_swap);
- /* we've already done css_get(mc.to) */
mc.moved_swap = 0;
}
memcg_oom_recover(from);
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg, *swap_memcg;
unsigned short oldid;
VM_BUG_ON_PAGE(PageLRU(page), page);
if (!memcg)
return;
- mem_cgroup_id_get(memcg);
- oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ /*
+ * In case the memcg owning these pages has been offlined and doesn't
+ * have an ID allocated to it anymore, charge the closest online
+ * ancestor for the swap instead and transfer the memory+swap charge.
+ */
+ swap_memcg = mem_cgroup_id_get_online(memcg);
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
VM_BUG_ON_PAGE(oldid, page);
- mem_cgroup_swap_statistics(memcg, true);
+ mem_cgroup_swap_statistics(swap_memcg, true);
page->mem_cgroup = NULL;
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);
+ if (memcg != swap_memcg) {
+ if (!mem_cgroup_is_root(swap_memcg))
+ page_counter_charge(&swap_memcg->memsw, 1);
+ page_counter_uncharge(&memcg->memsw, 1);
+ }
+
/*
* Interrupts should be disabled here because the caller holds the
* mapping->tree_lock lock which is taken with interrupts-off. It is
if (!memcg)
return 0;
+ memcg = mem_cgroup_id_get_online(memcg);
+
if (!mem_cgroup_is_root(memcg) &&
- !page_counter_try_charge(&memcg->swap, 1, &counter))
+ !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+ mem_cgroup_id_put(memcg);
return -ENOMEM;
+ }
- mem_cgroup_id_get(memcg);
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
mem_cgroup_swap_statistics(memcg, true);
/* init node's zones as empty zones, we don't have any present pages.*/
free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+ pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
/*
* The node we allocated has no zone fallback lists. For avoiding
static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
{
arch_refresh_nodedata(nid, NULL);
+ free_percpu(pgdat->per_cpu_nodestats);
arch_free_nodedata(pgdat);
return;
}
{
struct mm_struct *mm = task->mm;
struct task_struct *p;
- bool ret;
+ bool ret = true;
/*
* Skip tasks without mm because it might have passed its exit_mm and
int lru;
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
- pages[lru] = global_page_state(NR_LRU_BASE + lru);
+ pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
for_each_zone(zone)
wmark_low += zone->watermark[WMARK_LOW];
}
#endif
+static void setup_min_unmapped_ratio(void);
+static void setup_min_slab_ratio(void);
#else /* CONFIG_NUMA */
static void set_zonelist_order(void)
zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
#ifdef CONFIG_NUMA
zone->node = nid;
- pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
- / 100;
- pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j];
zone->zone_pgdat = pgdat;
setup_per_zone_wmarks();
refresh_zone_stat_thresholds();
setup_per_zone_lowmem_reserve();
+
+#ifdef CONFIG_NUMA
+ setup_min_unmapped_ratio();
+ setup_min_slab_ratio();
+#endif
+
return 0;
}
core_initcall(init_per_zone_wmark_min)
}
#ifdef CONFIG_NUMA
+static void setup_min_unmapped_ratio(void)
+{
+ pg_data_t *pgdat;
+ struct zone *zone;
+
+ for_each_online_pgdat(pgdat)
+ pgdat->min_unmapped_pages = 0;
+
+ for_each_zone(zone)
+ zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
+ sysctl_min_unmapped_ratio) / 100;
+}
+
+
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
- struct pglist_data *pgdat;
- struct zone *zone;
int rc;
rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
+ setup_min_unmapped_ratio();
+
+ return 0;
+}
+
+static void setup_min_slab_ratio(void)
+{
+ pg_data_t *pgdat;
+ struct zone *zone;
+
for_each_online_pgdat(pgdat)
pgdat->min_slab_pages = 0;
for_each_zone(zone)
- zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
- sysctl_min_unmapped_ratio) / 100;
- return 0;
+ zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
+ sysctl_min_slab_ratio) / 100;
}
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
- struct pglist_data *pgdat;
- struct zone *zone;
int rc;
rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
- for_each_online_pgdat(pgdat)
- pgdat->min_slab_pages = 0;
+ setup_min_slab_ratio();
- for_each_zone(zone)
- zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
- sysctl_min_slab_ratio) / 100;
return 0;
}
#endif
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
} else {
- if (PageTransCompound(page)) {
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ if (PageTransCompound(page) && page_mapping(page)) {
+ VM_WARN_ON_ONCE(!PageLocked(page));
+
SetPageDoubleMap(compound_head(page));
if (PageMlocked(page))
clear_page_mlock(compound_head(page));
{
int i, nr = 1;
- VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
+ VM_BUG_ON_PAGE(compound && !PageHead(page), page);
lock_page_memcg(page);
/* Hugepages are not counted in NR_FILE_MAPPED for now. */
struct kobj_attribute shmem_enabled_attr =
__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
bool shmem_huge_enabled(struct vm_area_struct *vma)
{
struct inode *inode = file_inode(vma->vm_file);
return false;
}
}
-#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
#else /* !CONFIG_SHMEM */
*/
static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
+ LIST_HEAD(discard);
struct page *page, *h;
BUG_ON(irqs_disabled());
list_for_each_entry_safe(page, h, &n->partial, lru) {
if (!page->inuse) {
remove_partial(n, page);
- discard_slab(s, page);
+ list_add(&page->lru, &discard);
} else {
list_slab_objects(s, page,
"Objects remaining in %s on __kmem_cache_shutdown()");
}
}
spin_unlock_irq(&n->list_lock);
+
+ list_for_each_entry_safe(page, h, &discard, lru)
+ discard_slab(s, page);
}
/*
/* wakeup anybody waiting for slots to pin pages */
wake_up(&vp_wq);
}
- kfree(in_pages);
- kfree(out_pages);
+ kvfree(in_pages);
+ kvfree(out_pages);
return err;
}
put_generic_request(req);
}
-void cancel_generic_request(struct ceph_mon_generic_request *req)
+static void cancel_generic_request(struct ceph_mon_generic_request *req)
{
struct ceph_mon_client *monc = req->monc;
struct ceph_mon_generic_request *lookup_req;
pages = ceph_alloc_page_vector(calc_pages_for(0, data_len),
GFP_NOIO);
- if (!pages) {
+ if (IS_ERR(pages)) {
ceph_msg_put(m);
return NULL;
}
}
EXPORT_SYMBOL(ceph_find_or_create_string);
-static void ceph_free_string(struct rcu_head *head)
-{
- struct ceph_string *cs = container_of(head, struct ceph_string, rcu);
- kfree(cs);
-}
-
void ceph_release_string(struct kref *ref)
{
struct ceph_string *cs = container_of(ref, struct ceph_string, kref);
}
spin_unlock(&string_tree_lock);
- call_rcu(&cs->rcu, ceph_free_string);
+ kfree_rcu(cs, rcu);
}
EXPORT_SYMBOL(ceph_release_string);
}
static struct gss_upcall_msg *
-__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid)
+__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth)
{
struct gss_upcall_msg *pos;
list_for_each_entry(pos, &pipe->in_downcall, list) {
if (!uid_eq(pos->uid, uid))
continue;
+ if (auth && pos->auth->service != auth->service)
+ continue;
atomic_inc(&pos->count);
dprintk("RPC: %s found msg %p\n", __func__, pos);
return pos;
struct gss_upcall_msg *old;
spin_lock(&pipe->lock);
- old = __gss_find_upcall(pipe, gss_msg->uid);
+ old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth);
if (old == NULL) {
atomic_inc(&gss_msg->count);
list_add(&gss_msg->list, &pipe->in_downcall);
err = -ENOENT;
/* Find a matching upcall */
spin_lock(&pipe->lock);
- gss_msg = __gss_find_upcall(pipe, uid);
+ gss_msg = __gss_find_upcall(pipe, uid, NULL);
if (gss_msg == NULL) {
spin_unlock(&pipe->lock);
goto err_put_ctx;
{
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
+ unsigned long reconnect_timeout;
unsigned char resvport;
int ret = 0;
return -EAGAIN;
}
resvport = xprt->resvport;
+ reconnect_timeout = xprt->max_reconnect_timeout;
rcu_read_unlock();
xprt = xprt_create_transport(xprtargs);
goto out_put_switch;
}
xprt->resvport = resvport;
+ xprt->max_reconnect_timeout = reconnect_timeout;
rpc_xprt_switch_set_roundrobin(xps);
if (setup) {
}
EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
+static int
+rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ void *data)
+{
+ unsigned long timeout = *((unsigned long *)data);
+
+ if (timeout < xprt->max_reconnect_timeout)
+ xprt->max_reconnect_timeout = timeout;
+ return 0;
+}
+
+void
+rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
+{
+ rpc_clnt_iterate_for_each_xprt(clnt,
+ rpc_xprt_cap_max_reconnect_timeout,
+ &timeo);
+}
+EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
static void rpc_show_header(void)
{
spin_unlock_bh(&xprt->transport_lock);
}
+static bool
+xprt_has_timer(const struct rpc_xprt *xprt)
+{
+ return xprt->idle_timeout != 0;
+}
+
+static void
+xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
+ __must_hold(&xprt->transport_lock)
+{
+ if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+ mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
+}
+
static void
xprt_init_autodisconnect(unsigned long data)
{
spin_lock(&xprt->transport_lock);
if (!list_empty(&xprt->recv))
goto out_abort;
+ /* Reset xprt->last_used to avoid connect/autodisconnect cycling */
+ xprt->last_used = jiffies;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
goto out_abort;
spin_unlock(&xprt->transport_lock);
goto out;
xprt->snd_task =NULL;
xprt->ops->release_xprt(xprt, NULL);
+ xprt_schedule_autodisconnect(xprt);
out:
spin_unlock_bh(&xprt->transport_lock);
wake_up_bit(&xprt->state, XPRT_LOCKED);
spin_unlock_bh(&xprt->transport_lock);
}
-static inline int xprt_has_timer(struct rpc_xprt *xprt)
-{
- return xprt->idle_timeout != 0;
-}
-
/**
* xprt_prepare_transmit - reserve the transport before sending a request
* @task: RPC task about to send a request
if (!list_empty(&req->rq_list))
list_del(&req->rq_list);
xprt->last_used = jiffies;
- if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
- mod_timer(&xprt->timer,
- xprt->last_used + xprt->idle_timeout);
+ xprt_schedule_autodisconnect(xprt);
spin_unlock_bh(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(req->rq_buffer);
* increase over time if the server is down or not responding.
*/
#define XS_TCP_INIT_REEST_TO (3U * HZ)
-#define XS_TCP_MAX_REEST_TO (5U * 60 * HZ)
/*
* TCP idle timeout; client drops the transport socket if it is idle
write_unlock_bh(&sk->sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
+
+ xprt->stat.connect_start = jiffies;
}
static void xs_udp_setup_socket(struct work_struct *work)
unsigned int keepcnt = xprt->timeout->to_retries + 1;
unsigned int opt_on = 1;
unsigned int timeo;
+ unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
/* TCP Keepalive options */
kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
(char *)&keepcnt, sizeof(keepcnt));
+ /* Avoid temporary address, they are bad for long-lived
+ * connections such as NFS mounts.
+ * RFC4941, section 3.6 suggests that:
+ * Individual applications, which have specific
+ * knowledge about the normal duration of connections,
+ * MAY override this as appropriate.
+ */
+ kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
+ (char *)&addr_pref, sizeof(addr_pref));
+
/* TCP user timeout (see RFC5482) */
timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
(xprt->timeout->to_retries + 1);
/* SYN_SENT! */
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ break;
+ case -EADDRNOTAVAIL:
+ /* Source port number is unavailable. Try a new one! */
+ transport->srcport = 0;
}
out:
return ret;
xprt_wake_pending_tasks(xprt, status);
}
+static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
+{
+ unsigned long start, now = jiffies;
+
+ start = xprt->stat.connect_start + xprt->reestablish_timeout;
+ if (time_after(start, now))
+ return start - now;
+ return 0;
+}
+
+static void xs_reconnect_backoff(struct rpc_xprt *xprt)
+{
+ xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+ xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+}
+
/**
* xs_connect - connect a socket to a remote endpoint
* @xprt: pointer to transport structure
static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ unsigned long delay = 0;
WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
/* Start by resetting any existing state */
xs_reset_transport(transport);
- queue_delayed_work(xprtiod_workqueue,
- &transport->connect_worker,
- xprt->reestablish_timeout);
- xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
- xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
- if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
- xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
- } else {
+ delay = xs_reconnect_delay(xprt);
+ xs_reconnect_backoff(xprt);
+
+ } else
dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
- queue_delayed_work(xprtiod_workqueue,
- &transport->connect_worker, 0);
- }
+
+ queue_delayed_work(xprtiod_workqueue,
+ &transport->connect_worker,
+ delay);
}
/**
xprt->ops = &xs_tcp_ops;
xprt->timeout = &xs_tcp_default_timeout;
+ xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+
INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
die "$P: file '${file}' not found\n";
}
}
- if ($from_filename || vcs_file_exists($file)) {
+ if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
$file =~ s/^\Q${cur_path}\E//; #strip any absolute path
$file =~ s/^\Q${lk_path}\E//; #or the path to the lk tree
push(@files, $file);
struct snd_card *card = dev_get_drvdata(dev);
struct azx *chip;
struct hda_intel *hda;
+ struct hdac_bus *bus;
if (!card)
return 0;
chip = card->private_data;
hda = container_of(chip, struct hda_intel, chip);
+ bus = azx_bus(chip);
if (chip->disabled || hda->init_failed || !chip->running)
return 0;
- if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL
- && hda->need_i915_power) {
- snd_hdac_display_power(azx_bus(chip), true);
- snd_hdac_i915_set_bclk(azx_bus(chip));
+ if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
+ snd_hdac_display_power(bus, true);
+ if (hda->need_i915_power)
+ snd_hdac_i915_set_bclk(bus);
}
+
if (chip->msi)
if (pci_enable_msi(pci) < 0)
chip->msi = 0;
hda_intel_init_chip(chip, true);
+ /* power down again for link-controlled chips */
+ if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+ !hda->need_i915_power)
+ snd_hdac_display_power(bus, false);
+
snd_power_change_state(card, SNDRV_CTL_POWER_D0);
trace_azx_resume(chip);
chip = card->private_data;
hda = container_of(chip, struct hda_intel, chip);
+ bus = azx_bus(chip);
if (chip->disabled || hda->init_failed)
return 0;
return 0;
if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
- bus = azx_bus(chip);
- if (hda->need_i915_power) {
- snd_hdac_display_power(bus, true);
+ snd_hdac_display_power(bus, true);
+ if (hda->need_i915_power)
snd_hdac_i915_set_bclk(bus);
- } else {
- /* toggle codec wakeup bit for STATESTS read */
- snd_hdac_set_codec_wakeup(bus, true);
- snd_hdac_set_codec_wakeup(bus, false);
- }
}
/* Read STATESTS before controller reset */
azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) &
~STATESTS_INT_MASK);
+ /* power down again for link-controlled chips */
+ if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+ !hda->need_i915_power)
+ snd_hdac_display_power(bus, false);
+
trace_azx_runtime_resume(chip);
return 0;
}
{
/* devices which do not support reading the sample rate. */
switch (chip->usb_id) {
+ case USB_ID(0x041E, 0x4080): /* Creative Live Cam VF0610 */
case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema */
case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
case USB_ID(0x045E, 0x076E): /* MS Lifecam HD-5001 */
case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
+ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */
case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
*/
#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
-
+#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#endif /* _ASM_X86_CPUFEATURES_H */
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK17 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
#define REQUIRED_MASK14 0
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
+#define REQUIRED_MASK17 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
-#define EXIT_REASON_PCOMMIT 65
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
{ EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
- { EXIT_REASON_XRSTORS, "XRSTORS" }, \
- { EXIT_REASON_PCOMMIT, "PCOMMIT" }
+ { EXIT_REASON_XRSTORS, "XRSTORS" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
};
enum bpf_prog_type {
BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
};
#define BPF_PSEUDO_MAP_FD 1
*/
BPF_FUNC_skb_get_tunnel_opt,
BPF_FUNC_skb_set_tunnel_opt,
+
+ /**
+ * bpf_skb_change_proto(skb, proto, flags)
+ * Change protocol of the skb. Currently supported is
+ * v4 -> v6, v6 -> v4 transitions. The helper will also
+ * resize the skb. eBPF program is expected to fill the
+ * new headers via skb_store_bytes and lX_csum_replace.
+ * @skb: pointer to skb
+ * @proto: new skb->protocol type
+ * @flags: reserved
+ * Return: 0 on success or negative error
+ */
+ BPF_FUNC_skb_change_proto,
+
+ /**
+ * bpf_skb_change_type(skb, type)
+ * Change packet type of skb.
+ * @skb: pointer to skb
+ * @type: new skb->pkt_type type
+ * Return: 0 on success or negative error
+ */
+ BPF_FUNC_skb_change_type,
+
+ /**
+ * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+ * @skb: pointer to skb
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 skb failed the cgroup2 descendant test
+ * == 1 skb succeeded the cgroup2 descendant test
+ * < 0 error
+ */
+ BPF_FUNC_skb_in_cgroup,
+
+ /**
+ * bpf_get_hash_recalc(skb)
+ * Retrieve and possibly recalculate skb->hash.
+ * @skb: pointer to skb
+ * Return: hash
+ */
+ BPF_FUNC_get_hash_recalc,
+
+ /**
+ * u64 bpf_get_current_task(void)
+ * Returns current task_struct
+ * Return: current
+ */
+ BPF_FUNC_get_current_task,
+
+ /**
+ * bpf_probe_write_user(void *dst, void *src, int len)
+ * safely attempt to write to a location
+ * @dst: destination address in userspace
+ * @src: source address on stack
+ * @len: number of bytes to copy
+ * Return: 0 on success or negative error
+ */
+ BPF_FUNC_probe_write_user,
+
__BPF_FUNC_MAX_ID,
};
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
__u32 tunnel_label;
};
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
LINE SYNTAX
-----------
Line range is described by following syntax.
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
- srcline, period, iregs, brstack, brstacksym, flags.
- Field list can be prepended with the type, trace, sw or hw,
+ srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
+ callindent. Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
#endif
#if defined(_CALL_ELF) && _CALL_ELF == 2
-bool arch__prefers_symtab(void)
-{
- return true;
-}
#ifdef HAVE_LIBELF_SUPPORT
void arch__sym_update(struct symbol *s, GElf_Sym *sym)
tev->point.offset += lep_offset;
}
}
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+ int ntevs)
+{
+ struct probe_trace_event *tev;
+ struct map *map;
+ struct symbol *sym = NULL;
+ struct rb_node *tmp;
+ int i = 0;
+
+ map = get_target_map(pev->target, pev->uprobes);
+ if (!map || map__load(map, NULL) < 0)
+ return;
+
+ for (i = 0; i < ntevs; i++) {
+ tev = &pev->tevs[i];
+ map__for_each_symbol(map, sym, tmp) {
+ if (map->unmap_ip(map, sym->start) == tev->point.address)
+ arch__fix_tev_from_maps(pev, tev, map, sym);
+ }
+ }
+}
+
#endif
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,period,iregs,brstack,brstacksym,flags,"
- "callindent", parse_output_fields),
+ "bpf-output,callindent", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
return 0;
}
-static void read_counters(bool close_counters)
+static void read_counters(void)
{
struct perf_evsel *counter;
if (perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
-
- if (close_counters) {
- perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
- thread_map__nr(evsel_list->threads));
- }
}
}
{
struct timespec ts, rs;
- read_counters(false);
+ read_counters();
clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time);
perf_evlist__enable(evsel_list);
}
+static void disable_counters(void)
+{
+ /*
+ * If we don't have tracee (attaching to task or cpu), counters may
+ * still be running. To get accurate group ratios, we must stop groups
+ * from counting before reading their constituent counters.
+ */
+ if (!target__none(&target))
+ perf_evlist__disable(evsel_list);
+}
+
static volatile int workload_exec_errno;
/*
}
}
+ disable_counters();
+
t1 = rdclock();
update_stats(&walltime_nsecs_stats, t1 - t0);
- read_counters(true);
+ /*
+ * Closing a group leader splits the group, and as we only disable
+ * group leaders, results in remaining events becoming enabled. To
+ * avoid arbitrary skew, we must read all counters before closing any
+ * group leaders.
+ */
+ read_counters();
+ perf_evlist__close(evsel_list);
return WEXITSTATUS(status);
}
module = "kernel";
for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+ /* short_name is "[module]" */
if (strncmp(pos->dso->short_name + 1, module,
- pos->dso->short_name_len - 2) == 0) {
+ pos->dso->short_name_len - 2) == 0 &&
+ module[pos->dso->short_name_len - 2] == '\0') {
return pos;
}
}
return NULL;
}
-static struct map *get_target_map(const char *target, bool user)
+struct map *get_target_map(const char *target, bool user)
{
/* Init maps of given executable or kernel */
if (user)
if (uprobes)
address = sym->start;
else
- address = map->unmap_ip(map, sym->start);
+ address = map->unmap_ip(map, sym->start) - map->reloc;
break;
}
if (!address) {
return ret;
}
-/* Post processing the probe events */
-static int post_process_probe_trace_events(struct probe_trace_event *tevs,
- int ntevs, const char *module,
- bool uprobe)
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs)
{
struct ref_reloc_sym *reloc_sym;
char *tmp;
int i, skipped = 0;
- if (uprobe)
- return add_exec_to_probe_trace_events(tevs, ntevs, module);
-
- /* Note that currently ref_reloc_sym based probe is not for drivers */
- if (module)
- return add_module_to_probe_trace_events(tevs, ntevs, module);
-
reloc_sym = kernel_get_ref_reloc_sym();
if (!reloc_sym) {
pr_warning("Relocated base symbol is not found!\n");
return skipped;
}
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+ int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event *tevs,
+ int ntevs, const char *module,
+ bool uprobe)
+{
+ int ret;
+
+ if (uprobe)
+ ret = add_exec_to_probe_trace_events(tevs, ntevs, module);
+ else if (module)
+ /* Currently ref_reloc_sym based probe is not for drivers */
+ ret = add_module_to_probe_trace_events(tevs, ntevs, module);
+ else
+ ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+ if (ret >= 0)
+ arch__post_process_probe_trace_events(pev, ntevs);
+
+ return ret;
+}
+
/* Try to find perf_probe_event with debuginfo */
static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs)
if (ntevs > 0) { /* Succeeded to find trace events */
pr_debug("Found %d probe_trace_events.\n", ntevs);
- ret = post_process_probe_trace_events(*tevs, ntevs,
+ ret = post_process_probe_trace_events(pev, *tevs, ntevs,
pev->target, pev->uprobes);
if (ret < 0 || ret == ntevs) {
clear_probe_trace_events(*tevs, ntevs);
return err;
}
-bool __weak arch__prefers_symtab(void) { return false; }
-
/* Concatinate two arrays */
static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
{
if (ret > 0 || pev->sdt) /* SDT can be found only in the cache */
return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
- if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
- ret = find_probe_trace_events_from_map(pev, tevs);
- if (ret > 0)
- return ret; /* Found in symbol table */
- }
-
/* Convert perf_probe_event with debuginfo */
ret = try_to_find_probe_trace_events(pev, tevs);
if (ret != 0)
int show_available_vars(struct perf_probe_event *pevs, int npevs,
struct strfilter *filter);
int show_available_funcs(const char *module, struct strfilter *filter, bool user);
-bool arch__prefers_symtab(void);
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
struct probe_trace_event *tev, struct map *map,
struct symbol *sym);
int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
struct perf_probe_arg *pvar);
+struct map *get_target_map(const char *target, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+ int ntevs);
+
#endif /*_PROBE_EVENT_H */
char sbuf[STRERR_BUFSIZE];
int bsize, boffs, total;
int ret;
+ char sign;
/* TODO: check all types */
- if (cast && strcmp(cast, "string") != 0) {
+ if (cast && strcmp(cast, "string") != 0 &&
+ strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
/* Non string type is OK */
+ /* and respect signedness cast */
tvar->type = strdup(cast);
return (tvar->type == NULL) ? -ENOMEM : 0;
}
return (tvar->type == NULL) ? -ENOMEM : 0;
}
+ if (cast && (strcmp(cast, "u") == 0))
+ sign = 'u';
+ else if (cast && (strcmp(cast, "s") == 0))
+ sign = 's';
+ else
+ sign = die_is_signed_type(&type) ? 's' : 'u';
+
ret = dwarf_bytesize(&type);
if (ret <= 0)
/* No size ... try to use default type */
dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
ret = MAX_BASIC_TYPE_BITS;
}
- ret = snprintf(buf, 16, "%c%d",
- die_is_signed_type(&type) ? 's' : 'u', ret);
+ ret = snprintf(buf, 16, "%c%d", sign, ret);
formatted:
if (ret < 0 || ret >= 16) {
} else {
pevent_event_info(&seq, evsel->tp_format, &rec);
}
- return seq.buffer;
+ /*
+ * Trim the buffer, it starts at 4KB and we're not going to
+ * add anything more to this buffer.
+ */
+ return realloc(seq.buffer, seq.len + 1);
}
static int64_t
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
#include <linux/libnvdimm.h>
#include <linux/vmalloc.h>
#include <linux/device.h>
if (nfit_test->setup != nfit_test0_setup)
return 0;
+ flush_work(&acpi_desc->work);
nfit_test->setup_hotplug = 1;
nfit_test->setup(nfit_test);
GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
-CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
+CFLAGS := -std=gnu99 -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
export CFLAGS