Merge remote-tracking branch 'kvm-ppc-paulus/kvm-ppc-next'
authorStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 02:26:25 +0000 (12:26 +1000)
committerStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 02:26:25 +0000 (12:26 +1000)
32 files changed:
arch/arm/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_host.h
arch/mips/include/asm/kvm_host.h
arch/powerpc/include/asm/book3s/64/mmu-hash.h
arch/powerpc/include/asm/io.h
arch/powerpc/include/asm/kvm_asm.h
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_book3s_64.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/mmu.h
arch/powerpc/include/asm/opal.h
arch/powerpc/include/asm/pnv-pci.h
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/Makefile
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_rm_xics.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_xics.c
arch/powerpc/kvm/book3s_xics.h
arch/powerpc/kvm/e500_mmu.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/kvm/trace_hv.h
arch/powerpc/mm/hash_native_64.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/powerpc/platforms/powernv/pci-ioda.c
arch/s390/include/asm/kvm_host.h
arch/x86/include/asm/kvm_host.h
virt/kvm/kvm_main.c

index de338d93d11b9002bb48a778064f0acf6d6dd79d..6ad21f04a922466aa59a957aa587c7d20409d85f 100644 (file)
@@ -183,15 +183,15 @@ struct kvm_vcpu_arch {
 };
 
 struct kvm_vm_stat {
-       u32 remote_tlb_flush;
+       ulong remote_tlb_flush;
 };
 
 struct kvm_vcpu_stat {
-       u32 halt_successful_poll;
-       u32 halt_attempted_poll;
-       u32 halt_poll_invalid;
-       u32 halt_wakeup;
-       u32 hvc_exit_stat;
+       u64 halt_successful_poll;
+       u64 halt_attempted_poll;
+       u64 halt_poll_invalid;
+       u64 halt_wakeup;
+       u64 hvc_exit_stat;
        u64 wfe_exit_stat;
        u64 wfi_exit_stat;
        u64 mmio_exit_user;
index 3eda975837d0ffea69f748153aa055ca28a7c049..bd94e67667599dc1ce499ab30de2cca91f4e97be 100644 (file)
@@ -290,15 +290,15 @@ struct kvm_vcpu_arch {
 #endif
 
 struct kvm_vm_stat {
-       u32 remote_tlb_flush;
+       ulong remote_tlb_flush;
 };
 
 struct kvm_vcpu_stat {
-       u32 halt_successful_poll;
-       u32 halt_attempted_poll;
-       u32 halt_poll_invalid;
-       u32 halt_wakeup;
-       u32 hvc_exit_stat;
+       u64 halt_successful_poll;
+       u64 halt_attempted_poll;
+       u64 halt_poll_invalid;
+       u64 halt_wakeup;
+       u64 hvc_exit_stat;
        u64 wfe_exit_stat;
        u64 wfi_exit_stat;
        u64 mmio_exit_user;
index 4d7e0e466b5a02ddcc3c1d4f2a2938234dcb762b..8365b0af7e6c98d55d3ebe2e12e1d2dc398d7bad 100644 (file)
@@ -124,32 +124,32 @@ static inline bool kvm_is_error_hva(unsigned long addr)
 extern atomic_t kvm_mips_instance;
 
 struct kvm_vm_stat {
-       u32 remote_tlb_flush;
+       ulong remote_tlb_flush;
 };
 
 struct kvm_vcpu_stat {
-       u32 wait_exits;
-       u32 cache_exits;
-       u32 signal_exits;
-       u32 int_exits;
-       u32 cop_unusable_exits;
-       u32 tlbmod_exits;
-       u32 tlbmiss_ld_exits;
-       u32 tlbmiss_st_exits;
-       u32 addrerr_st_exits;
-       u32 addrerr_ld_exits;
-       u32 syscall_exits;
-       u32 resvd_inst_exits;
-       u32 break_inst_exits;
-       u32 trap_inst_exits;
-       u32 msa_fpe_exits;
-       u32 fpe_exits;
-       u32 msa_disabled_exits;
-       u32 flush_dcache_exits;
-       u32 halt_successful_poll;
-       u32 halt_attempted_poll;
-       u32 halt_poll_invalid;
-       u32 halt_wakeup;
+       u64 wait_exits;
+       u64 cache_exits;
+       u64 signal_exits;
+       u64 int_exits;
+       u64 cop_unusable_exits;
+       u64 tlbmod_exits;
+       u64 tlbmiss_ld_exits;
+       u64 tlbmiss_st_exits;
+       u64 addrerr_st_exits;
+       u64 addrerr_ld_exits;
+       u64 syscall_exits;
+       u64 resvd_inst_exits;
+       u64 break_inst_exits;
+       u64 trap_inst_exits;
+       u64 msa_fpe_exits;
+       u64 fpe_exits;
+       u64 msa_disabled_exits;
+       u64 flush_dcache_exits;
+       u64 halt_successful_poll;
+       u64 halt_attempted_poll;
+       u64 halt_poll_invalid;
+       u64 halt_wakeup;
 };
 
 struct kvm_arch_memory_slot {
index 287a656ceb5794f728c495dd4e1b841fdc47c8d5..e407af2b7333500a4ebcc319c5337827abe34509 100644 (file)
@@ -244,6 +244,43 @@ static inline int segment_shift(int ssize)
        return SID_SHIFT_1T;
 }
 
+/*
+ * This array is indexed by the LP field of the HPTE second dword.
+ * Since this field may contain some RPN bits, some entries are
+ * replicated so that we get the same value irrespective of RPN.
+ * The top 4 bits are the page size index (MMU_PAGE_*) for the
+ * actual page size, the bottom 4 bits are the base page size.
+ */
+extern u8 hpte_page_sizes[1 << LP_BITS];
+
+static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
+                                            bool is_base_size)
+{
+       unsigned int i, lp;
+
+       if (!(h & HPTE_V_LARGE))
+               return 1ul << 12;
+
+       /* Look at the 8 bit LP value */
+       lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+       i = hpte_page_sizes[lp];
+       if (!i)
+               return 0;
+       if (!is_base_size)
+               i >>= 4;
+       return 1ul << mmu_psize_defs[i & 0xf].shift;
+}
+
+static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+{
+       return __hpte_page_size(h, l, 0);
+}
+
+static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
+{
+       return __hpte_page_size(h, l, 1);
+}
+
 /*
  * The current system page and segment sizes
  */
index 2fd1690b79d210702c123d6e54d7828ce521be9a..f6fda8482f601b4dd557ad19df6cfeba11d41717 100644 (file)
@@ -241,6 +241,35 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val)
 #endif
 #endif /* __powerpc64__ */
 
+
+/*
+ * Simple Cache inhibited accessors
+ * Unlike the DEF_MMIO_* macros, these don't include any h/w memory
+ * barriers, callers need to manage memory barriers on their own.
+ * These can only be used in hypervisor real mode.
+ */
+
+static inline u32 _lwzcix(unsigned long addr)
+{
+       u32 ret;
+
+       __asm__ __volatile__("lwzcix %0,0, %1"
+                            : "=r" (ret) : "r" (addr) : "memory");
+       return ret;
+}
+
+static inline void _stbcix(u64 addr, u8 val)
+{
+       __asm__ __volatile__("stbcix %0,0,%1"
+               : : "r" (val), "r" (addr) : "memory");
+}
+
+static inline void _stwcix(u64 addr, u32 val)
+{
+       __asm__ __volatile__("stwcix %0,0,%1"
+               : : "r" (val), "r" (addr) : "memory");
+}
+
 /*
  * Low level IO stream instructions are defined out of line for now
  */
index 5bca220bbb6056a58369cc23ceaade6a567f35f7..05cabed3d1bdff2005adf5bbe28d485ddd67277c 100644 (file)
 #define BOOK3S_INTERRUPT_FAC_UNAVAIL   0xf60
 #define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80
 
+/* book3s_hv */
+
+/*
+ * Special trap used to indicate to host that this is a
+ * passthrough interrupt that could not be handled
+ * completely in the guest.
+ */
+#define BOOK3S_INTERRUPT_HV_RM_HARD    0x5555
+
 #define BOOK3S_IRQPRIO_SYSTEM_RESET            0
 #define BOOK3S_IRQPRIO_DATA_SEGMENT            1
 #define BOOK3S_IRQPRIO_INST_SEGMENT            2
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
 #define RESUME_FLAG_ARCH1      (1<<2)
+#define RESUME_FLAG_ARCH2      (1<<3)
 
 #define RESUME_GUEST            0
 #define RESUME_GUEST_NV         RESUME_FLAG_NV
index 8f39796c9da8dffaede2751f607375b1d5a4f24b..cef2b892245cc18ebeba0961b36f14353e3381c9 100644 (file)
@@ -69,6 +69,42 @@ struct hpte_cache {
        int pagesize;
 };
 
+/*
+ * Struct for a virtual core.
+ * Note: entry_exit_map combines a bitmap of threads that have entered
+ * in the bottom 8 bits and a bitmap of threads that have exited in the
+ * next 8 bits.  This is so that we can atomically set the entry bit
+ * iff the exit map is 0 without taking a lock.
+ */
+struct kvmppc_vcore {
+       int n_runnable;
+       int num_threads;
+       int entry_exit_map;
+       int napping_threads;
+       int first_vcpuid;
+       u16 pcpu;
+       u16 last_cpu;
+       u8 vcore_state;
+       u8 in_guest;
+       struct kvmppc_vcore *master_vcore;
+       struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
+       struct list_head preempt_list;
+       spinlock_t lock;
+       struct swait_queue_head wq;
+       spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
+       u64 stolen_tb;
+       u64 preempt_tb;
+       struct kvm_vcpu *runner;
+       struct kvm *kvm;
+       u64 tb_offset;          /* guest timebase - host timebase */
+       ulong lpcr;
+       u32 arch_compat;
+       ulong pcr;
+       ulong dpdes;            /* doorbell state (POWER8) */
+       ulong conferring_threads;
+       unsigned int halt_poll_ns;
+};
+
 struct kvmppc_vcpu_book3s {
        struct kvmppc_sid_map sid_map[SID_MAP_NUM];
        struct {
@@ -191,6 +227,7 @@ extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
                                 struct kvm_vcpu *vcpu);
 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
                                   struct kvmppc_book3s_shadow_vcpu *svcpu);
+extern int kvm_irq_bypass;
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 {
index 88d17b4ea9c83177902a5c72a7ac344ba01c55d3..4ffd5a1e788da2480b216c1dbf8bd4754e5f3dcd 100644 (file)
@@ -20,6 +20,8 @@
 #ifndef __ASM_KVM_BOOK3S_64_H__
 #define __ASM_KVM_BOOK3S_64_H__
 
+#include <asm/book3s/64/mmu-hash.h>
+
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
 {
@@ -97,56 +99,20 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
        hpte[0] = cpu_to_be64(hpte_v);
 }
 
-static inline int __hpte_actual_psize(unsigned int lp, int psize)
-{
-       int i, shift;
-       unsigned int mask;
-
-       /* start from 1 ignoring MMU_PAGE_4K */
-       for (i = 1; i < MMU_PAGE_COUNT; i++) {
-
-               /* invalid penc */
-               if (mmu_psize_defs[psize].penc[i] == -1)
-                       continue;
-               /*
-                * encoding bits per actual page size
-                *        PTE LP     actual page size
-                *    rrrr rrrz         >=8KB
-                *    rrrr rrzz         >=16KB
-                *    rrrr rzzz         >=32KB
-                *    rrrr zzzz         >=64KB
-                * .......
-                */
-               shift = mmu_psize_defs[i].shift - LP_SHIFT;
-               if (shift > LP_BITS)
-                       shift = LP_BITS;
-               mask = (1 << shift) - 1;
-               if ((lp & mask) == mmu_psize_defs[psize].penc[i])
-                       return i;
-       }
-       return -1;
-}
-
 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
                                             unsigned long pte_index)
 {
-       int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
+       int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
        unsigned int penc;
        unsigned long rb = 0, va_low, sllp;
        unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
 
        if (v & HPTE_V_LARGE) {
-               for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
-
-                       /* valid entries have a shift value */
-                       if (!mmu_psize_defs[b_psize].shift)
-                               continue;
-
-                       a_psize = __hpte_actual_psize(lp, b_psize);
-                       if (a_psize != -1)
-                               break;
-               }
+               i = hpte_page_sizes[lp];
+               b_psize = i & 0xf;
+               a_psize = i >> 4;
        }
+
        /*
         * Ignore the top 14 bits of va
         * v have top two bits covering segment size, hence move
@@ -215,45 +181,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
        return rb;
 }
 
-static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
-                                            bool is_base_size)
-{
-
-       int size, a_psize;
-       /* Look at the 8 bit LP value */
-       unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
-
-       /* only handle 4k, 64k and 16M pages for now */
-       if (!(h & HPTE_V_LARGE))
-               return 1ul << 12;
-       else {
-               for (size = 0; size < MMU_PAGE_COUNT; size++) {
-                       /* valid entries have a shift value */
-                       if (!mmu_psize_defs[size].shift)
-                               continue;
-
-                       a_psize = __hpte_actual_psize(lp, size);
-                       if (a_psize != -1) {
-                               if (is_base_size)
-                                       return 1ul << mmu_psize_defs[size].shift;
-                               return 1ul << mmu_psize_defs[a_psize].shift;
-                       }
-               }
-
-       }
-       return 0;
-}
-
-static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
-{
-       return __hpte_page_size(h, l, 0);
-}
-
-static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
-{
-       return __hpte_page_size(h, l, 1);
-}
-
 static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
 {
        return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
index ec35af34a3fb411d6d8f536be997daac9e3b57ce..ed30d2ea21b7b3a5630b6965591427172776bf11 100644 (file)
@@ -43,6 +43,8 @@
 #include <asm/cputhreads.h>
 #define KVM_MAX_VCPU_ID                (threads_per_subcore * KVM_MAX_VCORES)
 
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #endif
@@ -95,42 +97,49 @@ struct kvmppc_vcpu_book3s;
 struct kvmppc_book3s_shadow_vcpu;
 
 struct kvm_vm_stat {
-       u32 remote_tlb_flush;
+       ulong remote_tlb_flush;
 };
 
 struct kvm_vcpu_stat {
-       u32 sum_exits;
-       u32 mmio_exits;
-       u32 signal_exits;
-       u32 light_exits;
+       u64 sum_exits;
+       u64 mmio_exits;
+       u64 signal_exits;
+       u64 light_exits;
        /* Account for special types of light exits: */
-       u32 itlb_real_miss_exits;
-       u32 itlb_virt_miss_exits;
-       u32 dtlb_real_miss_exits;
-       u32 dtlb_virt_miss_exits;
-       u32 syscall_exits;
-       u32 isi_exits;
-       u32 dsi_exits;
-       u32 emulated_inst_exits;
-       u32 dec_exits;
-       u32 ext_intr_exits;
-       u32 halt_successful_poll;
-       u32 halt_attempted_poll;
-       u32 halt_poll_invalid;
-       u32 halt_wakeup;
-       u32 dbell_exits;
-       u32 gdbell_exits;
-       u32 ld;
-       u32 st;
+       u64 itlb_real_miss_exits;
+       u64 itlb_virt_miss_exits;
+       u64 dtlb_real_miss_exits;
+       u64 dtlb_virt_miss_exits;
+       u64 syscall_exits;
+       u64 isi_exits;
+       u64 dsi_exits;
+       u64 emulated_inst_exits;
+       u64 dec_exits;
+       u64 ext_intr_exits;
+       u64 halt_poll_success_ns;
+       u64 halt_poll_fail_ns;
+       u64 halt_wait_ns;
+       u64 halt_successful_poll;
+       u64 halt_attempted_poll;
+       u64 halt_successful_wait;
+       u64 halt_poll_invalid;
+       u64 halt_wakeup;
+       u64 dbell_exits;
+       u64 gdbell_exits;
+       u64 ld;
+       u64 st;
 #ifdef CONFIG_PPC_BOOK3S
-       u32 pf_storage;
-       u32 pf_instruc;
-       u32 sp_storage;
-       u32 sp_instruc;
-       u32 queue_intr;
-       u32 ld_slow;
-       u32 st_slow;
+       u64 pf_storage;
+       u64 pf_instruc;
+       u64 sp_storage;
+       u64 sp_instruc;
+       u64 queue_intr;
+       u64 ld_slow;
+       u64 st_slow;
 #endif
+       u64 pthru_all;
+       u64 pthru_host;
+       u64 pthru_bad_aff;
 };
 
 enum kvm_exit_types {
@@ -197,6 +206,8 @@ struct kvmppc_spapr_tce_table {
 struct kvmppc_xics;
 struct kvmppc_icp;
 
+struct kvmppc_passthru_irqmap;
+
 /*
  * The reverse mapping array has one entry for each HPTE,
  * which stores the guest's view of the second word of the HPTE
@@ -267,6 +278,7 @@ struct kvm_arch {
 #endif
 #ifdef CONFIG_KVM_XICS
        struct kvmppc_xics *xics;
+       struct kvmppc_passthru_irqmap *pimap;
 #endif
        struct kvmppc_ops *kvm_ops;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -275,41 +287,6 @@ struct kvm_arch {
 #endif
 };
 
-/*
- * Struct for a virtual core.
- * Note: entry_exit_map combines a bitmap of threads that have entered
- * in the bottom 8 bits and a bitmap of threads that have exited in the
- * next 8 bits.  This is so that we can atomically set the entry bit
- * iff the exit map is 0 without taking a lock.
- */
-struct kvmppc_vcore {
-       int n_runnable;
-       int num_threads;
-       int entry_exit_map;
-       int napping_threads;
-       int first_vcpuid;
-       u16 pcpu;
-       u16 last_cpu;
-       u8 vcore_state;
-       u8 in_guest;
-       struct kvmppc_vcore *master_vcore;
-       struct list_head runnable_threads;
-       struct list_head preempt_list;
-       spinlock_t lock;
-       struct swait_queue_head wq;
-       spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
-       u64 stolen_tb;
-       u64 preempt_tb;
-       struct kvm_vcpu *runner;
-       struct kvm *kvm;
-       u64 tb_offset;          /* guest timebase - host timebase */
-       ulong lpcr;
-       u32 arch_compat;
-       ulong pcr;
-       ulong dpdes;            /* doorbell state (POWER8) */
-       ulong conferring_threads;
-};
-
 #define VCORE_ENTRY_MAP(vc)    ((vc)->entry_exit_map & 0xff)
 #define VCORE_EXIT_MAP(vc)     ((vc)->entry_exit_map >> 8)
 #define VCORE_IS_EXITING(vc)   (VCORE_EXIT_MAP(vc) != 0)
@@ -329,6 +306,7 @@ struct kvmppc_vcore {
 #define VCORE_SLEEPING 3
 #define VCORE_RUNNING  4
 #define VCORE_EXITING  5
+#define VCORE_POLLING  6
 
 /*
  * Struct used to manage memory for a virtual processor area
@@ -397,6 +375,20 @@ struct kvmhv_tb_accumulator {
        u64     tb_max;         /* max time */
 };
 
+#ifdef CONFIG_PPC_BOOK3S_64
+struct kvmppc_irq_map {
+       u32     r_hwirq;
+       u32     v_hwirq;
+       struct irq_desc *desc;
+};
+
+#define        KVMPPC_PIRQ_MAPPED      1024
+struct kvmppc_passthru_irqmap {
+       int n_mapped;
+       struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED];
+};
+#endif
+
 # ifdef CONFIG_PPC_FSL_BOOK3E
 #define KVMPPC_BOOKE_IAC_NUM   2
 #define KVMPPC_BOOKE_DAC_NUM   2
@@ -668,7 +660,6 @@ struct kvm_vcpu_arch {
        long pgfault_index;
        unsigned long pgfault_hpte[2];
 
-       struct list_head run_list;
        struct task_struct *run_task;
        struct kvm_run *kvm_run;
 
index 2544edabe7f392f1edb25fdf0c1c79a146b1c28b..f6e49640dbe1ebd38bae9a3e1b2af30a679f6fdb 100644 (file)
@@ -287,6 +287,10 @@ struct kvmppc_ops {
        long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
                              unsigned long arg);
        int (*hcall_implemented)(unsigned long hcall);
+       int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
+                                      struct irq_bypass_producer *);
+       void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
+                                       struct irq_bypass_producer *);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
@@ -453,8 +457,19 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
 }
+
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+                               struct kvm *kvm)
+{
+       if (kvm && kvm_irq_bypass)
+               return kvm->arch.pimap;
+       return NULL;
+}
+
 extern void kvmppc_alloc_host_rm_ops(void);
 extern void kvmppc_free_host_rm_ops(void);
+extern void kvmppc_free_pimap(struct kvm *kvm);
+extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
 extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
 extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -464,10 +479,23 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
                        struct kvm_vcpu *vcpu, u32 cpu);
 extern void kvmppc_xics_ipi_action(void);
+extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+                                  unsigned long host_irq);
+extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+                                  unsigned long host_irq);
+extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr,
+                                struct kvmppc_irq_map *irq_map,
+                                struct kvmppc_passthru_irqmap *pimap);
 extern int h_ipi_redirect;
 #else
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+                               struct kvm *kvm)
+       { return NULL; }
 static inline void kvmppc_alloc_host_rm_ops(void) {};
 static inline void kvmppc_free_host_rm_ops(void) {};
+static inline void kvmppc_free_pimap(struct kvm *kvm) {};
+static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+       { return 0; }
 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
        { return 0; }
 static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
index e2fb408f83983617591e3fe3511f9691b76f0b3d..b78e8d3377f6b9a7c708638f42db9f8a14469043 100644 (file)
@@ -271,6 +271,7 @@ static inline bool early_radix_enabled(void)
 #define MMU_PAGE_16G   13
 #define MMU_PAGE_64G   14
 
+/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
 #define MMU_PAGE_COUNT 15
 
 #ifdef CONFIG_PPC_BOOK3S_64
index ee05bd20363032c36d567413a8fe1d1050959846..e958b7096f19c5ea703816e935bd4ba434a370ad 100644 (file)
@@ -67,6 +67,7 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
 int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
                                   uint64_t offset, uint32_t data);
 int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
+int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
 int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
 int64_t opal_register_exception_handler(uint64_t opal_exception,
                                        uint64_t handler_address,
index 0cbd8134ce81949f8bf079181a7180aa55ee9abb..1b46b52d3212f40ca4038750871cf45e98d33e00 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
+#include <linux/irq.h>
 #include <misc/cxl-base.h>
 #include <asm/opal-api.h>
 
@@ -33,6 +34,8 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
 void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
 int pnv_cxl_get_irq_count(struct pci_dev *dev);
 struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev);
+int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq);
+bool is_pnv_opal_msi(struct irq_chip *chip);
 
 #ifdef CONFIG_CXL_BASE
 int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
index c2024ac9d4e859963c1c15408824b98baa3e92c3..029be26b5a17edc01b5486bde83baea9a2b3765f 100644 (file)
@@ -22,6 +22,9 @@ config KVM
        select ANON_INODES
        select HAVE_KVM_EVENTFD
        select SRCU
+       select KVM_VFIO
+       select IRQ_BYPASS_MANAGER
+       select HAVE_KVM_IRQ_BYPASS
 
 config KVM_BOOK3S_HANDLER
        bool
index 855d4b95d75255a328e5320765427125ea8b49de..7dd89b79d038065c410f39a5070fc9dc1bf8c05f 100644 (file)
@@ -7,16 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
 KVM := ../../../virt/kvm
 
-common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
-               $(KVM)/eventfd.o
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
 common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
+common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
 
 CFLAGS_e500_mmu.o := -I.
 CFLAGS_e500_mmu_host.o := -I.
 CFLAGS_emulate.o  := -I.
 CFLAGS_emulate_loadstore.o  := -I.
 
-common-objs-y += powerpc.o emulate.o emulate_loadstore.o
+common-objs-y += powerpc.o emulate_loadstore.o
 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
 obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
 
@@ -24,6 +24,7 @@ AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj)
 
 kvm-e500-objs := \
        $(common-objs-y) \
+       emulate.o \
        booke.o \
        booke_emulate.o \
        booke_interrupts.o \
@@ -35,6 +36,7 @@ kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
 
 kvm-e500mc-objs := \
        $(common-objs-y) \
+       emulate.o \
        booke.o \
        booke_emulate.o \
        bookehv_interrupts.o \
@@ -61,9 +63,6 @@ kvm-pr-y := \
        book3s_32_mmu.o
 
 ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-kvm-book3s_64-module-objs := \
-       $(KVM)/coalesced_mmio.o
-
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
        book3s_rmhandlers.o
 endif
@@ -89,11 +88,8 @@ endif
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
        book3s_xics.o
 
-kvm-book3s_64-module-objs += \
-       $(KVM)/kvm_main.o \
-       $(KVM)/eventfd.o \
-       powerpc.o \
-       emulate_loadstore.o \
+kvm-book3s_64-module-objs := \
+       $(common-objs-y) \
        book3s.o \
        book3s_64_vio.o \
        book3s_rtas.o \
@@ -103,6 +99,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
 
 kvm-book3s_32-objs := \
        $(common-objs-y) \
+       emulate.o \
        fpu.o \
        book3s_paired_singles.o \
        book3s.o \
index 47018fcbf7d6aa3dd02caeef441dccd52ed476a4..ba231a1d43d46835bda1ef4664a91f77e122bc91 100644 (file)
@@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "dec",         VCPU_STAT(dec_exits) },
        { "ext_intr",    VCPU_STAT(ext_intr_exits) },
        { "queue_intr",  VCPU_STAT(queue_intr) },
+       { "halt_poll_success_ns",       VCPU_STAT(halt_poll_success_ns) },
+       { "halt_poll_fail_ns",          VCPU_STAT(halt_poll_fail_ns) },
+       { "halt_wait_ns",               VCPU_STAT(halt_wait_ns) },
        { "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
        { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
+       { "halt_successful_wait",       VCPU_STAT(halt_successful_wait) },
        { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
        { "halt_wakeup", VCPU_STAT(halt_wakeup) },
        { "pf_storage",  VCPU_STAT(pf_storage) },
@@ -64,6 +68,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "ld_slow",     VCPU_STAT(ld_slow) },
        { "st",          VCPU_STAT(st) },
        { "st_slow",     VCPU_STAT(st_slow) },
+       { "pthru_all",       VCPU_STAT(pthru_all) },
+       { "pthru_host",      VCPU_STAT(pthru_host) },
+       { "pthru_bad_aff",   VCPU_STAT(pthru_bad_aff) },
        { NULL }
 };
 
index 2fd5580c8f6ebaf3981015c7688cbfb4fea880f2..9b3bba643b43e04f6f9a911b520b22c819aec167 100644 (file)
 #include <asm/smp.h>
 #include <asm/dbell.h>
 #include <asm/hmi.h>
+#include <asm/pnv-pci.h>
 #include <linux/gfp.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
+#include <linux/kvm_irqfd.h>
+#include <linux/irqbypass.h>
 #include <linux/module.h>
+#include <linux/compiler.h>
 
 #include "book3s.h"
 
@@ -70,6 +74,8 @@
 
 /* Used to indicate that a guest page fault needs to be handled */
 #define RESUME_PAGE_FAULT      (RESUME_GUEST | RESUME_FLAG_ARCH1)
+/* Used to indicate that a guest passthrough interrupt needs to be handled */
+#define RESUME_PASSTHROUGH     (RESUME_GUEST | RESUME_FLAG_ARCH2)
 
 /* Used as a "null" value for timebase values */
 #define TB_NIL (~(u64)0)
@@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = {
        .get = param_get_int,
 };
 
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
+                                                       S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
+
 module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
                                                        S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
 #endif
 
+/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
+static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
+module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
+
+/* Factor by which the vcore halt poll interval is grown, default is to double
+ */
+static unsigned int halt_poll_ns_grow = 2;
+module_param(halt_poll_ns_grow, int, S_IRUGO);
+MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
+
+/* Factor by which the vcore halt poll interval is shrunk, default is to reset
+ */
+static unsigned int halt_poll_ns_shrink;
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
+MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
+static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
+               int *ip)
+{
+       int i = *ip;
+       struct kvm_vcpu *vcpu;
+
+       while (++i < MAX_SMT_THREADS) {
+               vcpu = READ_ONCE(vc->runnable_threads[i]);
+               if (vcpu) {
+                       *ip = i;
+                       return vcpu;
+               }
+       }
+       return NULL;
+}
+
+/* Used to traverse the list of runnable threads for a given vcore */
+#define for_each_runnable_thread(i, vcpu, vc) \
+       for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
+
 static bool kvmppc_ipi_thread(int cpu)
 {
        /* On POWER8 for IPIs to threads in the same core, use msgsnd */
@@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
                kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
                r = RESUME_GUEST;
                break;
+       case BOOK3S_INTERRUPT_HV_RM_HARD:
+               r = RESUME_PASSTHROUGH;
+               break;
        default:
                kvmppc_dump_regs(vcpu);
                printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
@@ -1493,7 +1543,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
        if (vcore == NULL)
                return NULL;
 
-       INIT_LIST_HEAD(&vcore->runnable_threads);
        spin_lock_init(&vcore->lock);
        spin_lock_init(&vcore->stoltb_lock);
        init_swait_queue_head(&vcore->wq);
@@ -1802,7 +1851,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
        vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
        spin_unlock_irq(&vcpu->arch.tbacct_lock);
        --vc->n_runnable;
-       list_del(&vcpu->arch.run_list);
+       WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
 }
 
 static int kvmppc_grab_hwthread(int cpu)
@@ -2209,10 +2258,10 @@ static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
 
 static void prepare_threads(struct kvmppc_vcore *vc)
 {
-       struct kvm_vcpu *vcpu, *vnext;
+       int i;
+       struct kvm_vcpu *vcpu;
 
-       list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
-                                arch.run_list) {
+       for_each_runnable_thread(i, vcpu, vc) {
                if (signal_pending(vcpu->arch.run_task))
                        vcpu->arch.ret = -EINTR;
                else if (vcpu->arch.vpa.update_pending ||
@@ -2259,15 +2308,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
 
 static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 {
-       int still_running = 0;
+       int still_running = 0, i;
        u64 now;
        long ret;
-       struct kvm_vcpu *vcpu, *vnext;
+       struct kvm_vcpu *vcpu;
 
        spin_lock(&vc->lock);
        now = get_tb();
-       list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
-                                arch.run_list) {
+       for_each_runnable_thread(i, vcpu, vc) {
                /* cancel pending dec exception if dec is positive */
                if (now < vcpu->arch.dec_expires &&
                    kvmppc_core_pending_dec(vcpu))
@@ -2307,8 +2355,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
                }
                if (vc->n_runnable > 0 && vc->runner == NULL) {
                        /* make sure there's a candidate runner awake */
-                       vcpu = list_first_entry(&vc->runnable_threads,
-                                               struct kvm_vcpu, arch.run_list);
+                       i = -1;
+                       vcpu = next_runnable_thread(vc, &i);
                        wake_up(&vcpu->arch.cpu_run);
                }
        }
@@ -2361,7 +2409,7 @@ static inline void kvmppc_set_host_core(int cpu)
  */
 static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 {
-       struct kvm_vcpu *vcpu, *vnext;
+       struct kvm_vcpu *vcpu;
        int i;
        int srcu_idx;
        struct core_info core_info;
@@ -2397,8 +2445,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
         */
        if ((threads_per_core > 1) &&
            ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
-               list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
-                                        arch.run_list) {
+               for_each_runnable_thread(i, vcpu, vc) {
                        vcpu->arch.ret = -EBUSY;
                        kvmppc_remove_runnable(vc, vcpu);
                        wake_up(&vcpu->arch.cpu_run);
@@ -2477,8 +2524,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
                active |= 1 << thr;
                list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
                        pvc->pcpu = pcpu + thr;
-                       list_for_each_entry(vcpu, &pvc->runnable_threads,
-                                           arch.run_list) {
+                       for_each_runnable_thread(i, vcpu, pvc) {
                                kvmppc_start_thread(vcpu, pvc);
                                kvmppc_create_dtl_entry(vcpu, pvc);
                                trace_kvm_guest_enter(vcpu);
@@ -2604,34 +2650,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
        finish_wait(&vcpu->arch.cpu_run, &wait);
 }
 
+static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+       /* 10us base */
+       if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
+               vc->halt_poll_ns = 10000;
+       else
+               vc->halt_poll_ns *= halt_poll_ns_grow;
+
+       if (vc->halt_poll_ns > halt_poll_max_ns)
+               vc->halt_poll_ns = halt_poll_max_ns;
+}
+
+static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+       if (halt_poll_ns_shrink == 0)
+               vc->halt_poll_ns = 0;
+       else
+               vc->halt_poll_ns /= halt_poll_ns_shrink;
+}
+
+/* Check to see if any of the runnable vcpus on the vcore have pending
+ * exceptions or are no longer ceded
+ */
+static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
+{
+       struct kvm_vcpu *vcpu;
+       int i;
+
+       for_each_runnable_thread(i, vcpu, vc) {
+               if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
+                       return 1;
+       }
+
+       return 0;
+}
+
 /*
  * All the vcpus in this vcore are idle, so wait for a decrementer
  * or external interrupt to one of the vcpus.  vc->lock is held.
  */
 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 {
-       struct kvm_vcpu *vcpu;
+       ktime_t cur, start_poll, start_wait;
        int do_sleep = 1;
+       u64 block_ns;
        DECLARE_SWAITQUEUE(wait);
 
-       prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+       /* Poll for pending exceptions and ceded state */
+       cur = start_poll = ktime_get();
+       if (vc->halt_poll_ns) {
+               ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
+               ++vc->runner->stat.halt_attempted_poll;
 
-       /*
-        * Check one last time for pending exceptions and ceded state after
-        * we put ourselves on the wait queue
-        */
-       list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-               if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
-                       do_sleep = 0;
-                       break;
+               vc->vcore_state = VCORE_POLLING;
+               spin_unlock(&vc->lock);
+
+               do {
+                       if (kvmppc_vcore_check_block(vc)) {
+                               do_sleep = 0;
+                               break;
+                       }
+                       cur = ktime_get();
+               } while (single_task_running() && ktime_before(cur, stop));
+
+               spin_lock(&vc->lock);
+               vc->vcore_state = VCORE_INACTIVE;
+
+               if (!do_sleep) {
+                       ++vc->runner->stat.halt_successful_poll;
+                       goto out;
                }
        }
 
-       if (!do_sleep) {
+       prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+       if (kvmppc_vcore_check_block(vc)) {
                finish_swait(&vc->wq, &wait);
-               return;
+               do_sleep = 0;
+               /* If we polled, count this as a successful poll */
+               if (vc->halt_poll_ns)
+                       ++vc->runner->stat.halt_successful_poll;
+               goto out;
        }
 
+       start_wait = ktime_get();
+
        vc->vcore_state = VCORE_SLEEPING;
        trace_kvmppc_vcore_blocked(vc, 0);
        spin_unlock(&vc->lock);
@@ -2640,13 +2744,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
        spin_lock(&vc->lock);
        vc->vcore_state = VCORE_INACTIVE;
        trace_kvmppc_vcore_blocked(vc, 1);
+       ++vc->runner->stat.halt_successful_wait;
+
+       cur = ktime_get();
+
+out:
+       block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
+
+       /* Attribute wait time */
+       if (do_sleep) {
+               vc->runner->stat.halt_wait_ns +=
+                       ktime_to_ns(cur) - ktime_to_ns(start_wait);
+               /* Attribute failed poll time */
+               if (vc->halt_poll_ns)
+                       vc->runner->stat.halt_poll_fail_ns +=
+                               ktime_to_ns(start_wait) -
+                               ktime_to_ns(start_poll);
+       } else {
+               /* Attribute successful poll time */
+               if (vc->halt_poll_ns)
+                       vc->runner->stat.halt_poll_success_ns +=
+                               ktime_to_ns(cur) -
+                               ktime_to_ns(start_poll);
+       }
+
+       /* Adjust poll time */
+       if (halt_poll_max_ns) {
+               if (block_ns <= vc->halt_poll_ns)
+                       ;
+               /* We slept and blocked for longer than the max halt time */
+               else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
+                       shrink_halt_poll_ns(vc);
+               /* We slept and our poll time is too small */
+               else if (vc->halt_poll_ns < halt_poll_max_ns &&
+                               block_ns < halt_poll_max_ns)
+                       grow_halt_poll_ns(vc);
+       } else
+               vc->halt_poll_ns = 0;
+
+       trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
 }
 
 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
-       int n_ceded;
+       int n_ceded, i;
        struct kvmppc_vcore *vc;
-       struct kvm_vcpu *v, *vn;
+       struct kvm_vcpu *v;
 
        trace_kvmppc_run_vcpu_enter(vcpu);
 
@@ -2666,7 +2809,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
        vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
        vcpu->arch.busy_preempt = TB_NIL;
-       list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
+       WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
        ++vc->n_runnable;
 
        /*
@@ -2706,8 +2849,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                        kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
                        continue;
                }
-               list_for_each_entry_safe(v, vn, &vc->runnable_threads,
-                                        arch.run_list) {
+               for_each_runnable_thread(i, v, vc) {
                        kvmppc_core_prepare_to_enter(v);
                        if (signal_pending(v->arch.run_task)) {
                                kvmppc_remove_runnable(vc, v);
@@ -2720,7 +2862,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
                        break;
                n_ceded = 0;
-               list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
+               for_each_runnable_thread(i, v, vc) {
                        if (!v->arch.pending_exceptions)
                                n_ceded += v->arch.ceded;
                        else
@@ -2759,8 +2901,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
        if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
                /* Wake up some vcpu to run the core */
-               v = list_first_entry(&vc->runnable_threads,
-                                    struct kvm_vcpu, arch.run_list);
+               i = -1;
+               v = next_runnable_thread(vc, &i);
                wake_up(&v->arch.cpu_run);
        }
 
@@ -2818,7 +2960,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        r = kvmppc_book3s_hv_page_fault(run, vcpu,
                                vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
                        srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-               }
+               } else if (r == RESUME_PASSTHROUGH)
+                       r = kvmppc_xics_rm_complete(vcpu, 0);
        } while (is_kvmppc_resume_guest(r));
 
  out:
@@ -3247,6 +3390,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
        kvmppc_free_vcores(kvm);
 
        kvmppc_free_hpt(kvm);
+
+       kvmppc_free_pimap(kvm);
 }
 
 /* We don't need to emulate any privileged instructions or dcbz */
@@ -3282,6 +3427,184 @@ static int kvmppc_core_check_processor_compat_hv(void)
        return 0;
 }
 
+#ifdef CONFIG_KVM_XICS
+
+void kvmppc_free_pimap(struct kvm *kvm)
+{
+       kfree(kvm->arch.pimap);
+}
+
+static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
+{
+       return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
+}
+
+static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
+{
+       struct irq_desc *desc;
+       struct kvmppc_irq_map *irq_map;
+       struct kvmppc_passthru_irqmap *pimap;
+       struct irq_chip *chip;
+       int i;
+
+       if (!kvm_irq_bypass)
+               return 1;
+
+       desc = irq_to_desc(host_irq);
+       if (!desc)
+               return -EIO;
+
+       mutex_lock(&kvm->lock);
+
+       pimap = kvm->arch.pimap;
+       if (pimap == NULL) {
+               /* First call, allocate structure to hold IRQ map */
+               pimap = kvmppc_alloc_pimap();
+               if (pimap == NULL) {
+                       mutex_unlock(&kvm->lock);
+                       return -ENOMEM;
+               }
+               kvm->arch.pimap = pimap;
+       }
+
+       /*
+        * For now, we only support interrupts for which the EOI operation
+        * is an OPAL call followed by a write to XIRR, since that's
+        * what our real-mode EOI code does.
+        */
+       chip = irq_data_get_irq_chip(&desc->irq_data);
+       if (!chip || !is_pnv_opal_msi(chip)) {
+               pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
+                       host_irq, guest_gsi);
+               mutex_unlock(&kvm->lock);
+               return -ENOENT;
+       }
+
+       /*
+        * See if we already have an entry for this guest IRQ number.
+        * If it's mapped to a hardware IRQ number, that's an error,
+        * otherwise re-use this entry.
+        */
+       for (i = 0; i < pimap->n_mapped; i++) {
+               if (guest_gsi == pimap->mapped[i].v_hwirq) {
+                       if (pimap->mapped[i].r_hwirq) {
+                               mutex_unlock(&kvm->lock);
+                               return -EINVAL;
+                       }
+                       break;
+               }
+       }
+
+       if (i == KVMPPC_PIRQ_MAPPED) {
+               mutex_unlock(&kvm->lock);
+               return -EAGAIN;         /* table is full */
+       }
+
+       irq_map = &pimap->mapped[i];
+
+       irq_map->v_hwirq = guest_gsi;
+       irq_map->desc = desc;
+
+       /*
+        * Order the above two stores before the next to serialize with
+        * the KVM real mode handler.
+        */
+       smp_wmb();
+       irq_map->r_hwirq = desc->irq_data.hwirq;
+
+       if (i == pimap->n_mapped)
+               pimap->n_mapped++;
+
+       kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+
+       mutex_unlock(&kvm->lock);
+
+       return 0;
+}
+
+static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
+{
+       struct irq_desc *desc;
+       struct kvmppc_passthru_irqmap *pimap;
+       int i;
+
+       if (!kvm_irq_bypass)
+               return 0;
+
+       desc = irq_to_desc(host_irq);
+       if (!desc)
+               return -EIO;
+
+       mutex_lock(&kvm->lock);
+
+       if (kvm->arch.pimap == NULL) {
+               mutex_unlock(&kvm->lock);
+               return 0;
+       }
+       pimap = kvm->arch.pimap;
+
+       for (i = 0; i < pimap->n_mapped; i++) {
+               if (guest_gsi == pimap->mapped[i].v_hwirq)
+                       break;
+       }
+
+       if (i == pimap->n_mapped) {
+               mutex_unlock(&kvm->lock);
+               return -ENODEV;
+       }
+
+       kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
+
+       /* invalidate the entry */
+       pimap->mapped[i].r_hwirq = 0;
+
+       /*
+        * We don't free this structure even when the count goes to
+        * zero. The structure is freed when we destroy the VM.
+        */
+
+       mutex_unlock(&kvm->lock);
+       return 0;
+}
+
+static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
+                                            struct irq_bypass_producer *prod)
+{
+       int ret = 0;
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+       irqfd->producer = prod;
+
+       ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+       if (ret)
+               pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
+                       prod->irq, irqfd->gsi, ret);
+
+       return ret;
+}
+
+static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
+                                             struct irq_bypass_producer *prod)
+{
+       int ret;
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+       irqfd->producer = NULL;
+
+       /*
+        * When producer of consumer is unregistered, we change back to
+        * default external interrupt handling mode - KVM real mode
+        * will switch back to host.
+        */
+       ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+       if (ret)
+               pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
+                       prod->irq, irqfd->gsi, ret);
+}
+#endif
+
 static long kvm_arch_vm_ioctl_hv(struct file *filp,
                                 unsigned int ioctl, unsigned long arg)
 {
@@ -3400,6 +3723,10 @@ static struct kvmppc_ops kvm_ops_hv = {
        .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
        .arch_vm_ioctl  = kvm_arch_vm_ioctl_hv,
        .hcall_implemented = kvmppc_hcall_impl_hv,
+#ifdef CONFIG_KVM_XICS
+       .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
+       .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
+#endif
 };
 
 static int kvm_init_subcore_bitmap(void)
index 5f0380db3eabcf483a8ff98f68cad7d4b96f0c96..0c84d6bc835646c69ddfbe01efc4a690ad3b156e 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/xics.h>
 #include <asm/dbell.h>
 #include <asm/cputhreads.h>
+#include <asm/io.h>
 
 #define KVM_CMA_CHUNK_ORDER    18
 
@@ -286,3 +287,158 @@ void kvmhv_commence_exit(int trap)
 
 struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
 EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
+
+#ifdef CONFIG_KVM_XICS
+static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
+                                        u32 xisr)
+{
+       int i;
+
+       /*
+        * We access the mapped array here without a lock.  That
+        * is safe because we never reduce the number of entries
+        * in the array and we never change the v_hwirq field of
+        * an entry once it is set.
+        *
+        * We have also carefully ordered the stores in the writer
+        * and the loads here in the reader, so that if we find a matching
+        * hwirq here, the associated GSI and irq_desc fields are valid.
+        */
+       for (i = 0; i < pimap->n_mapped; i++)  {
+               if (xisr == pimap->mapped[i].r_hwirq) {
+                       /*
+                        * Order subsequent reads in the caller to serialize
+                        * with the writer.
+                        */
+                       smp_rmb();
+                       return &pimap->mapped[i];
+               }
+       }
+       return NULL;
+}
+
+/*
+ * If we have an interrupt that's not an IPI, check if we have a
+ * passthrough adapter and if so, check if this external interrupt
+ * is for the adapter.
+ * We will attempt to deliver the IRQ directly to the target VCPU's
+ * ICP, the virtual ICP (based on affinity - the xive value in ICS).
+ *
+ * If the delivery fails or if this is not for a passthrough adapter,
+ * return to the host to handle this interrupt. We earlier
+ * saved a copy of the XIRR in the PACA, it will be picked up by
+ * the host ICP driver.
+ */
+static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
+{
+       struct kvmppc_passthru_irqmap *pimap;
+       struct kvmppc_irq_map *irq_map;
+       struct kvm_vcpu *vcpu;
+
+       vcpu = local_paca->kvm_hstate.kvm_vcpu;
+       if (!vcpu)
+               return 1;
+       pimap = kvmppc_get_passthru_irqmap(vcpu->kvm);
+       if (!pimap)
+               return 1;
+       irq_map = get_irqmap(pimap, xisr);
+       if (!irq_map)
+               return 1;
+
+       /* We're handling this interrupt, generic code doesn't need to */
+       local_paca->kvm_hstate.saved_xirr = 0;
+
+       return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap);
+}
+
+#else
+static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
+{
+       return 1;
+}
+#endif
+
+/*
+ * Determine what sort of external interrupt is pending (if any).
+ * Returns:
+ *     0 if no interrupt is pending
+ *     1 if an interrupt is pending that needs to be handled by the host
+ *     2 Passthrough that needs completion in the host
+ *     -1 if there was a guest wakeup IPI (which has now been cleared)
+ *     -2 if there is PCI passthrough external interrupt that was handled
+ */
+
+long kvmppc_read_intr(void)
+{
+       unsigned long xics_phys;
+       u32 h_xirr;
+       __be32 xirr;
+       u32 xisr;
+       u8 host_ipi;
+
+       /* see if a host IPI is pending */
+       host_ipi = local_paca->kvm_hstate.host_ipi;
+       if (host_ipi)
+               return 1;
+
+       /* Now read the interrupt from the ICP */
+       xics_phys = local_paca->kvm_hstate.xics_phys;
+       if (unlikely(!xics_phys))
+               return 1;
+
+       /*
+        * Save XIRR for later. Since we get control in reverse endian
+        * on LE systems, save it byte reversed and fetch it back in
+        * host endian. Note that xirr is the value read from the
+        * XIRR register, while h_xirr is the host endian version.
+        */
+       xirr = _lwzcix(xics_phys + XICS_XIRR);
+       h_xirr = be32_to_cpu(xirr);
+       local_paca->kvm_hstate.saved_xirr = h_xirr;
+       xisr = h_xirr & 0xffffff;
+       /*
+        * Ensure that the store/load complete to guarantee all side
+        * effects of loading from XIRR has completed
+        */
+       smp_mb();
+
+       /* if nothing pending in the ICP */
+       if (!xisr)
+               return 0;
+
+       /* We found something in the ICP...
+        *
+        * If it is an IPI, clear the MFRR and EOI it.
+        */
+       if (xisr == XICS_IPI) {
+               _stbcix(xics_phys + XICS_MFRR, 0xff);
+               _stwcix(xics_phys + XICS_XIRR, xirr);
+               /*
+                * Need to ensure side effects of above stores
+                * complete before proceeding.
+                */
+               smp_mb();
+
+               /*
+                * We need to re-check host IPI now in case it got set in the
+                * meantime. If it's clear, we bounce the interrupt to the
+                * guest
+                */
+               host_ipi = local_paca->kvm_hstate.host_ipi;
+               if (unlikely(host_ipi != 0)) {
+                       /* We raced with the host,
+                        * we need to resend that IPI, bummer
+                        */
+                       _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
+                       /* Let side effects complete */
+                       smp_mb();
+                       return 1;
+               }
+
+               /* OK, it's an IPI for us */
+               local_paca->kvm_hstate.saved_xirr = 0;
+               return -1;
+       }
+
+       return kvmppc_check_passthru(xisr, xirr);
+}
index 980d8a6f728427a0c58463bd3b3a594fe72fb6b5..82ff5de8b1e7a5564df01dd323c0662ab6457b3b 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/kvm_host.h>
 #include <linux/err.h>
+#include <linux/kernel_stat.h>
 
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_ppc.h>
 #include <asm/debug.h>
 #include <asm/synch.h>
 #include <asm/cputhreads.h>
+#include <asm/pgtable.h>
 #include <asm/ppc-opcode.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
 
 #include "book3s_xics.h"
 
 
 int h_ipi_redirect = 1;
 EXPORT_SYMBOL(h_ipi_redirect);
+int kvm_irq_bypass = 1;
+EXPORT_SYMBOL(kvm_irq_bypass);
 
 static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
                            u32 new_irq);
+static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu);
 
 /* -- ICS routines -- */
 static void ics_rm_check_resend(struct kvmppc_xics *xics,
@@ -708,10 +715,123 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
                icp->rm_action |= XICS_RM_NOTIFY_EOI;
                icp->rm_eoied_irq = irq;
        }
+
+       if (state->host_irq) {
+               ++vcpu->stat.pthru_all;
+               if (state->intr_cpu != -1) {
+                       int pcpu = raw_smp_processor_id();
+
+                       pcpu = cpu_first_thread_sibling(pcpu);
+                       ++vcpu->stat.pthru_host;
+                       if (state->intr_cpu != pcpu) {
+                               ++vcpu->stat.pthru_bad_aff;
+                               xics_opal_rm_set_server(state->host_irq, pcpu);
+                       }
+                       state->intr_cpu = -1;
+               }
+       }
  bail:
        return check_too_hard(xics, icp);
 }
 
+unsigned long eoi_rc;
+
+static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
+{
+       unsigned long xics_phys;
+       int64_t rc;
+
+       rc = pnv_opal_pci_msi_eoi(c, hwirq);
+
+       if (rc)
+               eoi_rc = rc;
+
+       iosync();
+
+       /* EOI it */
+       xics_phys = local_paca->kvm_hstate.xics_phys;
+       _stwcix(xics_phys + XICS_XIRR, xirr);
+}
+
+static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
+{
+       unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
+
+       return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
+}
+
+/*
+ * Increment a per-CPU 32-bit unsigned integer variable.
+ * Safe to call in real-mode. Handles vmalloc'ed addresses
+ *
+ * ToDo: Make this work for any integral type
+ */
+
+static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
+{
+       unsigned long l;
+       unsigned int *raddr;
+       int cpu = smp_processor_id();
+
+       raddr = per_cpu_ptr(addr, cpu);
+       l = (unsigned long)raddr;
+
+       if (REGION_ID(l) == VMALLOC_REGION_ID) {
+               l = vmalloc_to_phys(raddr);
+               raddr = (unsigned int *)l;
+       }
+       ++*raddr;
+}
+
+/*
+ * We don't try to update the flags in the irq_desc 'istate' field in
+ * here as would happen in the normal IRQ handling path for several reasons:
+ *  - state flags represent internal IRQ state and are not expected to be
+ *    updated outside the IRQ subsystem
+ *  - more importantly, these are useful for edge triggered interrupts,
+ *    IRQ probing, etc., but we are only handling MSI/MSIx interrupts here
+ *    and these states shouldn't apply to us.
+ *
+ * However, we do update irq_stats - we somewhat duplicate the code in
+ * kstat_incr_irqs_this_cpu() for this since this function is defined
+ * in irq/internal.h which we don't want to include here.
+ * The only difference is that desc->kstat_irqs is an allocated per CPU
+ * variable and could have been vmalloc'ed, so we can't directly
+ * call __this_cpu_inc() on it. The kstat structure is a static
+ * per CPU variable and it should be accessible by real-mode KVM.
+ *
+ */
+static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
+{
+       this_cpu_inc_rm(desc->kstat_irqs);
+       __this_cpu_inc(kstat.irqs_sum);
+}
+
+long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
+                                u32 xirr,
+                                struct kvmppc_irq_map *irq_map,
+                                struct kvmppc_passthru_irqmap *pimap)
+{
+       struct kvmppc_xics *xics;
+       struct kvmppc_icp *icp;
+       u32 irq;
+
+       irq = irq_map->v_hwirq;
+       xics = vcpu->kvm->arch.xics;
+       icp = vcpu->arch.icp;
+
+       kvmppc_rm_handle_irq_desc(irq_map->desc);
+       icp_rm_deliver_irq(xics, icp, irq);
+
+       /* EOI the interrupt */
+       icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr);
+
+       if (check_too_hard(xics, icp) == H_TOO_HARD)
+               return 2;
+       else
+               return -2;
+}
+
 /*  --- Non-real mode XICS-related built-in routines ---  */
 
 /**
index 97565557384417269410a7c1ab048ae70f689e64..7cc924b5eea272f75d078ba50aa36217c7ba25d9 100644 (file)
@@ -221,6 +221,13 @@ kvmppc_primary_no_guest:
        li      r3, 0           /* Don't wake on privileged (OS) doorbell */
        b       kvm_do_nap
 
+/*
+ * kvm_novcpu_wakeup
+ *     Entered from kvm_start_guest if kvm_hstate.napping is set
+ *     to NAPPING_NOVCPU
+ *             r2 = kernel TOC
+ *             r13 = paca
+ */
 kvm_novcpu_wakeup:
        ld      r1, HSTATE_HOST_R1(r13)
        ld      r5, HSTATE_KVM_VCORE(r13)
@@ -230,6 +237,13 @@ kvm_novcpu_wakeup:
        /* check the wake reason */
        bl      kvmppc_check_wake_reason
 
+       /*
+        * Restore volatile registers since we could have called
+        * a C routine in kvmppc_check_wake_reason.
+        *      r5 = VCORE
+        */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+
        /* see if any other thread is already exiting */
        lwz     r0, VCORE_ENTRY_EXIT(r5)
        cmpwi   r0, 0x100
@@ -322,6 +336,11 @@ kvm_start_guest:
 
        /* Check the wake reason in SRR1 to see why we got here */
        bl      kvmppc_check_wake_reason
+       /*
+        * kvmppc_check_wake_reason could invoke a C routine, but we
+        * have no volatile registers to restore when we return.
+        */
+
        cmpdi   r3, 0
        bge     kvm_no_guest
 
@@ -881,6 +900,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        cmpwi   r3, 512         /* 1 microsecond */
        blt     hdec_soon
 
+deliver_guest_interrupt:
        ld      r6, VCPU_CTR(r4)
        ld      r7, VCPU_XER(r4)
 
@@ -895,7 +915,6 @@ kvmppc_cede_reentry:                /* r4 = vcpu, r13 = paca */
        mtspr   SPRN_SRR0, r6
        mtspr   SPRN_SRR1, r7
 
-deliver_guest_interrupt:
        /* r11 = vcpu->arch.msr & ~MSR_HV */
        rldicl  r11, r11, 63 - MSR_HV_LG, 1
        rotldi  r11, r11, 1 + MSR_HV_LG
@@ -1155,10 +1174,54 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
         * set, we know the host wants us out so let's do it now
         */
        bl      kvmppc_read_intr
+
+       /*
+        * Restore the active volatile registers after returning from
+        * a C function.
+        */
+       ld      r9, HSTATE_KVM_VCPU(r13)
+       li      r12, BOOK3S_INTERRUPT_EXTERNAL
+
+       /*
+        * kvmppc_read_intr return codes:
+        *
+        * Exit to host (r3 > 0)
+        *   1 An interrupt is pending that needs to be handled by the host
+        *     Exit guest and return to host by branching to guest_exit_cont
+        *
+        *   2 Passthrough that needs completion in the host
+        *     Exit guest and return to host by branching to guest_exit_cont
+        *     However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
+        *     to indicate to the host to complete handling the interrupt
+        *
+        * Before returning to guest, we check if any CPU is heading out
+        * to the host and if so, we head out also. If no CPUs are heading
+        * check return values <= 0.
+        *
+        * Return to guest (r3 <= 0)
+        *  0 No external interrupt is pending
+        * -1 A guest wakeup IPI (which has now been cleared)
+        *    In either case, we return to guest to deliver any pending
+        *    guest interrupts.
+        *
+        * -2 A PCI passthrough external interrupt was handled
+        *    (interrupt was delivered directly to guest)
+        *    Return to guest to deliver any pending guest interrupts.
+        */
+
+       cmpdi   r3, 1
+       ble     1f
+
+       /* Return code = 2 */
+       li      r12, BOOK3S_INTERRUPT_HV_RM_HARD
+       stw     r12, VCPU_TRAP(r9)
+       b       guest_exit_cont
+
+1:     /* Return code <= 1 */
        cmpdi   r3, 0
        bgt     guest_exit_cont
 
-       /* Check if any CPU is heading out to the host, if so head out too */
+       /* Return code <= 0 */
 4:     ld      r5, HSTATE_KVM_VCORE(r13)
        lwz     r0, VCORE_ENTRY_EXIT(r5)
        cmpwi   r0, 0x100
@@ -2213,10 +2276,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
        ld      r29, VCPU_GPR(R29)(r4)
        ld      r30, VCPU_GPR(R30)(r4)
        ld      r31, VCPU_GPR(R31)(r4)
+
        /* Check the wake reason in SRR1 to see why we got here */
        bl      kvmppc_check_wake_reason
 
+       /*
+        * Restore volatile registers since we could have called a
+        * C routine in kvmppc_check_wake_reason
+        *      r4 = VCPU
+        * r3 tells us whether we need to return to host or not
+        * WARNING: it gets checked further down:
+        * should not modify r3 until this check is done.
+        */
+       ld      r4, HSTATE_KVM_VCPU(r13)
+
        /* clear our bit in vcore->napping_threads */
 34:    ld      r5,HSTATE_KVM_VCORE(r13)
        lbz     r7,HSTATE_PTID(r13)
@@ -2230,7 +2303,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
        li      r0,0
        stb     r0,HSTATE_NAPPING(r13)
 
-       /* See if the wake reason means we need to exit */
+       /* See if the wake reason saved in r3 means we need to exit */
        stw     r12, VCPU_TRAP(r4)
        mr      r9, r4
        cmpdi   r3, 0
@@ -2297,10 +2370,14 @@ machine_check_realmode:
  *     0 if nothing needs to be done
  *     1 if something happened that needs to be handled by the host
  *     -1 if there was a guest wakeup (IPI or msgsnd)
+ *     -2 if we handled a PCI passthrough interrupt (returned by
+ *             kvmppc_read_intr only)
  *
  * Also sets r12 to the interrupt vector for any interrupt that needs
  * to be handled now by the host (0x500 for external interrupt), or zero.
- * Modifies r0, r6, r7, r8.
+ * Modifies all volatile registers (since it may call a C function).
+ * This routine calls kvmppc_read_intr, a C function, if an external
+ * interrupt is pending.
  */
 kvmppc_check_wake_reason:
        mfspr   r6, SPRN_SRR1
@@ -2310,8 +2387,7 @@ FTR_SECTION_ELSE
        rlwinm  r6, r6, 45-31, 0xe      /* P7 wake reason field is 3 bits */
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
        cmpwi   r6, 8                   /* was it an external interrupt? */
-       li      r12, BOOK3S_INTERRUPT_EXTERNAL
-       beq     kvmppc_read_intr        /* if so, see what it was */
+       beq     7f                      /* if so, see what it was */
        li      r3, 0
        li      r12, 0
        cmpwi   r6, 6                   /* was it the decrementer? */
@@ -2350,83 +2426,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        li      r3, 1
        blr
 
-/*
- * Determine what sort of external interrupt is pending (if any).
- * Returns:
- *     0 if no interrupt is pending
- *     1 if an interrupt is pending that needs to be handled by the host
- *     -1 if there was a guest wakeup IPI (which has now been cleared)
- * Modifies r0, r6, r7, r8, returns value in r3.
- */
-kvmppc_read_intr:
-       /* see if a host IPI is pending */
-       li      r3, 1
-       lbz     r0, HSTATE_HOST_IPI(r13)
-       cmpwi   r0, 0
-       bne     1f
+       /* external interrupt - create a stack frame so we can call C */
+7:     mflr    r0
+       std     r0, PPC_LR_STKOFF(r1)
+       stdu    r1, -PPC_MIN_STKFRM(r1)
+       bl      kvmppc_read_intr
+       nop
+       li      r12, BOOK3S_INTERRUPT_EXTERNAL
+       cmpdi   r3, 1
+       ble     1f
 
-       /* Now read the interrupt from the ICP */
-       ld      r6, HSTATE_XICS_PHYS(r13)
-       li      r7, XICS_XIRR
-       cmpdi   r6, 0
-       beq-    1f
-       lwzcix  r0, r6, r7
        /*
-        * Save XIRR for later. Since we get in in reverse endian on LE
-        * systems, save it byte reversed and fetch it back in host endian.
-        */
-       li      r3, HSTATE_SAVED_XIRR
-       STWX_BE r0, r3, r13
-#ifdef __LITTLE_ENDIAN__
-       lwz     r3, HSTATE_SAVED_XIRR(r13)
-#else
-       mr      r3, r0
-#endif
-       rlwinm. r3, r3, 0, 0xffffff
-       sync
-       beq     1f                      /* if nothing pending in the ICP */
-
-       /* We found something in the ICP...
-        *
-        * If it's not an IPI, stash it in the PACA and return to
-        * the host, we don't (yet) handle directing real external
-        * interrupts directly to the guest
+        * Return code of 2 means PCI passthrough interrupt, but
+        * we need to return back to host to complete handling the
+        * interrupt. Trap reason is expected in r12 by guest
+        * exit code.
         */
-       cmpwi   r3, XICS_IPI            /* if there is, is it an IPI? */
-       bne     42f
-
-       /* It's an IPI, clear the MFRR and EOI it */
-       li      r3, 0xff
-       li      r8, XICS_MFRR
-       stbcix  r3, r6, r8              /* clear the IPI */
-       stwcix  r0, r6, r7              /* EOI it */
-       sync
-
-       /* We need to re-check host IPI now in case it got set in the
-        * meantime. If it's clear, we bounce the interrupt to the
-        * guest
-        */
-       lbz     r0, HSTATE_HOST_IPI(r13)
-       cmpwi   r0, 0
-       bne-    43f
-
-       /* OK, it's an IPI for us */
-       li      r12, 0
-       li      r3, -1
-1:     blr
-
-42:    /* It's not an IPI and it's for the host. We saved a copy of XIRR in
-        * the PACA earlier, it will be picked up by the host ICP driver
-        */
-       li      r3, 1
-       b       1b
-
-43:    /* We raced with the host, we need to resend that IPI, bummer */
-       li      r0, IPI_PRIORITY
-       stbcix  r0, r6, r8              /* set the IPI */
-       sync
-       li      r3, 1
-       b       1b
+       li      r12, BOOK3S_INTERRUPT_HV_RM_HARD
+1:
+       ld      r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
+       addi    r1, r1, PPC_MIN_STKFRM
+       mtlr    r0
+       blr
 
 /*
  * Save away FP, VMX and VSX registers.
index 05aa11399a7867c1a4148d0f9eb4bf80bca3ee9d..3bdc639157c177eaaea320eece1209e4e2df92c0 100644 (file)
@@ -99,6 +99,10 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
                return 0;
        }
 
+       /* Record which CPU this arrived on for passed-through interrupts */
+       if (state->host_irq)
+               state->intr_cpu = raw_smp_processor_id();
+
        /* Attempt delivery */
        icp_deliver_irq(xics, NULL, irq);
 
@@ -812,7 +816,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
        return H_SUCCESS;
 }
 
-static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
 {
        struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
        struct kvmppc_icp *icp = vcpu->arch.icp;
@@ -841,6 +845,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
 
        return H_SUCCESS;
 }
+EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete);
 
 int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 {
@@ -892,6 +897,21 @@ EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
 
 /* -- Initialisation code etc. -- */
 
+static void xics_debugfs_irqmap(struct seq_file *m,
+                               struct kvmppc_passthru_irqmap *pimap)
+{
+       int i;
+
+       if (!pimap)
+               return;
+       seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n",
+                               pimap->n_mapped);
+       for (i = 0; i < pimap->n_mapped; i++)  {
+               seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n",
+                       pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq);
+       }
+}
+
 static int xics_debug_show(struct seq_file *m, void *private)
 {
        struct kvmppc_xics *xics = m->private;
@@ -913,6 +933,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
        t_check_resend = 0;
        t_reject = 0;
 
+       xics_debugfs_irqmap(m, kvm->arch.pimap);
+
        seq_printf(m, "=========\nICP state\n=========\n");
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -1252,6 +1274,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 {
        struct kvmppc_xics *xics = kvm->arch.xics;
 
+       if (!xics)
+               return -ENODEV;
        return ics_deliver_irq(xics, irq, level);
 }
 
@@ -1418,3 +1442,34 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
        return pin;
 }
+
+void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
+                           unsigned long host_irq)
+{
+       struct kvmppc_xics *xics = kvm->arch.xics;
+       struct kvmppc_ics *ics;
+       u16 idx;
+
+       ics = kvmppc_xics_find_ics(xics, irq, &idx);
+       if (!ics)
+               return;
+
+       ics->irq_state[idx].host_irq = host_irq;
+       ics->irq_state[idx].intr_cpu = -1;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped);
+
+void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq,
+                           unsigned long host_irq)
+{
+       struct kvmppc_xics *xics = kvm->arch.xics;
+       struct kvmppc_ics *ics;
+       u16 idx;
+
+       ics = kvmppc_xics_find_ics(xics, irq, &idx);
+       if (!ics)
+               return;
+
+       ics->irq_state[idx].host_irq = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped);
index a46b954055c435524aa6d8d4020291e8a32fab61..2a50320b55ca62a023ccd49c809b1d9c393eac08 100644 (file)
@@ -42,6 +42,8 @@ struct ics_irq_state {
        u8  lsi;                /* level-sensitive interrupt */
        u8  asserted; /* Only for LSI */
        u8  exists;
+       int intr_cpu;
+       u32 host_irq;
 };
 
 /* Atomic ICP state, updated with a single compare & swap */
index 29911a07bcdb071d1f94db55ca8e7ce4e2b9e5d3..0a2eeb143e8781f37cb0ef1f426a47813c500931 100644 (file)
@@ -743,7 +743,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
        char *virt;
        struct page **pages;
        struct tlbe_priv *privs[2] = {};
-       u64 *g2h_bitmap = NULL;
+       u64 *g2h_bitmap;
        size_t array_len;
        u32 sets;
        int num_pages, ret, i;
@@ -779,41 +779,44 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 
        num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
                    cfg->array / PAGE_SIZE;
-       pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
+       pages = kmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
        if (!pages)
                return -ENOMEM;
 
        ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
        if (ret < 0)
-               goto err_pages;
+               goto free_pages;
 
        if (ret != num_pages) {
                num_pages = ret;
                ret = -EFAULT;
-               goto err_put_page;
+               goto put_pages;
        }
 
        virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
        if (!virt) {
                ret = -ENOMEM;
-               goto err_put_page;
+               goto put_pages;
        }
 
-       privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
-                          GFP_KERNEL);
-       privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
-                          GFP_KERNEL);
+       privs[0] = kcalloc(params.tlb_sizes[0], sizeof(*privs[0]), GFP_KERNEL);
+       if (!privs[0]) {
+               ret = -ENOMEM;
+               goto put_pages;
+       }
 
-       if (!privs[0] || !privs[1]) {
+       privs[1] = kcalloc(params.tlb_sizes[1], sizeof(*privs[1]), GFP_KERNEL);
+       if (!privs[1]) {
                ret = -ENOMEM;
-               goto err_privs;
+               goto free_privs_first;
        }
 
-       g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
-                            GFP_KERNEL);
+       g2h_bitmap = kcalloc(params.tlb_sizes[1],
+                            sizeof(*g2h_bitmap),
+                            GFP_KERNEL);
        if (!g2h_bitmap) {
                ret = -ENOMEM;
-               goto err_privs;
+               goto free_privs_second;
        }
 
        free_gtlb(vcpu_e500);
@@ -845,16 +848,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 
        kvmppc_recalc_tlb1map_range(vcpu_e500);
        return 0;
-
-err_privs:
-       kfree(privs[0]);
+ free_privs_second:
        kfree(privs[1]);
-
-err_put_page:
+ free_privs_first:
+       kfree(privs[0]);
+ put_pages:
        for (i = 0; i < num_pages; i++)
                put_page(pages[i]);
-
-err_pages:
+ free_pages:
        kfree(pages);
        return ret;
 }
@@ -904,8 +905,6 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
        struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
-       int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
-       int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
 
        if (e500_mmu_host_init(vcpu_e500))
                goto err;
@@ -920,27 +919,30 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
        vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
        vcpu_e500->gtlb_params[1].sets = 1;
 
-       vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
+       vcpu_e500->gtlb_arch = kmalloc_array(KVM_E500_TLB0_SIZE +
+                                            KVM_E500_TLB1_SIZE,
+                                            sizeof(*vcpu_e500->gtlb_arch),
+                                            GFP_KERNEL);
        if (!vcpu_e500->gtlb_arch)
                return -ENOMEM;
 
        vcpu_e500->gtlb_offset[0] = 0;
        vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
 
-       vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
-                                         vcpu_e500->gtlb_params[0].entries,
+       vcpu_e500->gtlb_priv[0] = kcalloc(vcpu_e500->gtlb_params[0].entries,
+                                         sizeof(struct tlbe_ref),
                                          GFP_KERNEL);
        if (!vcpu_e500->gtlb_priv[0])
                goto err;
 
-       vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
-                                         vcpu_e500->gtlb_params[1].entries,
+       vcpu_e500->gtlb_priv[1] = kcalloc(vcpu_e500->gtlb_params[1].entries,
+                                         sizeof(struct tlbe_ref),
                                          GFP_KERNEL);
        if (!vcpu_e500->gtlb_priv[1])
                goto err;
 
-       vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
-                                         vcpu_e500->gtlb_params[1].entries,
+       vcpu_e500->g2h_tlb1_map = kcalloc(vcpu_e500->gtlb_params[1].entries,
+                                         sizeof(*vcpu_e500->g2h_tlb1_map),
                                          GFP_KERNEL);
        if (!vcpu_e500->g2h_tlb1_map)
                goto err;
index 6ce40dd6fe51a1d8701020bfc8257ba3c4ea6c85..0b7d6642265066c69cef30ee64ff415c12204edd 100644 (file)
@@ -27,6 +27,8 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/module.h>
+#include <linux/irqbypass.h>
+#include <linux/kvm_irqfd.h>
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
@@ -739,6 +741,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #endif
 }
 
+/*
+ * irq_bypass_add_producer and irq_bypass_del_producer are only
+ * useful if the architecture supports PCI passthrough.
+ * irq_bypass_stop and irq_bypass_start are not needed and so
+ * kvm_ops are not defined for them.
+ */
+bool kvm_arch_has_irq_bypass(void)
+{
+       return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) ||
+               (kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer));
+}
+
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+                                    struct irq_bypass_producer *prod)
+{
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+       struct kvm *kvm = irqfd->kvm;
+
+       if (kvm->arch.kvm_ops->irq_bypass_add_producer)
+               return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod);
+
+       return 0;
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+                                     struct irq_bypass_producer *prod)
+{
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+       struct kvm *kvm = irqfd->kvm;
+
+       if (kvm->arch.kvm_ops->irq_bypass_del_producer)
+               kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
+}
+
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
@@ -1167,6 +1205,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
        return r;
 }
 
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_MPIC
+       if (kvm->arch.mpic)
+               return true;
+#endif
+#ifdef CONFIG_KVM_XICS
+       if (kvm->arch.xics)
+               return true;
+#endif
+       return false;
+}
+
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                     struct kvm_mp_state *mp_state)
 {
index 33d9daff57832edf2f531cc8175a983f526a3ca2..fb21990c0fb47fbaebb9eafdcec25deeb9d8fd7a 100644 (file)
@@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
                   __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
 );
 
+TRACE_EVENT(kvmppc_vcore_wakeup,
+       TP_PROTO(int do_sleep, __u64 ns),
+
+       TP_ARGS(do_sleep, ns),
+
+       TP_STRUCT__entry(
+               __field(__u64,  ns)
+               __field(int,    waited)
+               __field(pid_t,  tgid)
+       ),
+
+       TP_fast_assign(
+               __entry->ns     = ns;
+               __entry->waited = do_sleep;
+               __entry->tgid   = current->tgid;
+       ),
+
+       TP_printk("%s time %lld ns, tgid=%d",
+               __entry->waited ? "wait" : "poll",
+               __entry->ns, __entry->tgid)
+);
+
 TRACE_EVENT(kvmppc_run_vcpu_enter,
        TP_PROTO(struct kvm_vcpu *vcpu),
 
index 0e4e9654bd2c1fdc07e265d3a7c35b1ab2917dff..83ddc0e171b0d64e069e288e15f08074fce8e3a6 100644 (file)
@@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid,
 }
 #endif
 
-static inline int __hpte_actual_psize(unsigned int lp, int psize)
-{
-       int i, shift;
-       unsigned int mask;
-
-       /* start from 1 ignoring MMU_PAGE_4K */
-       for (i = 1; i < MMU_PAGE_COUNT; i++) {
-
-               /* invalid penc */
-               if (mmu_psize_defs[psize].penc[i] == -1)
-                       continue;
-               /*
-                * encoding bits per actual page size
-                *        PTE LP     actual page size
-                *    rrrr rrrz         >=8KB
-                *    rrrr rrzz         >=16KB
-                *    rrrr rzzz         >=32KB
-                *    rrrr zzzz         >=64KB
-                * .......
-                */
-               shift = mmu_psize_defs[i].shift - LP_SHIFT;
-               if (shift > LP_BITS)
-                       shift = LP_BITS;
-               mask = (1 << shift) - 1;
-               if ((lp & mask) == mmu_psize_defs[psize].penc[i])
-                       return i;
-       }
-       return -1;
-}
-
 static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
                        int *psize, int *apsize, int *ssize, unsigned long *vpn)
 {
@@ -538,16 +508,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
                size   = MMU_PAGE_4K;
                a_size = MMU_PAGE_4K;
        } else {
-               for (size = 0; size < MMU_PAGE_COUNT; size++) {
-
-                       /* valid entries have a shift value */
-                       if (!mmu_psize_defs[size].shift)
-                               continue;
-
-                       a_size = __hpte_actual_psize(lp, size);
-                       if (a_size != -1)
-                               break;
-               }
+               size = hpte_page_sizes[lp] & 0xf;
+               a_size = hpte_page_sizes[lp] >> 4;
        }
        /* This works for all page sizes, and for 256M and 1T segments */
        if (cpu_has_feature(CPU_FTR_ARCH_300))
index 0821556e16f4b22e8db28071e8a429499aa5fa87..ef3ae891a3db49a36cbe32e36fa8f3b243c93db9 100644 (file)
@@ -93,6 +93,9 @@ static unsigned long _SDR1;
 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 EXPORT_SYMBOL_GPL(mmu_psize_defs);
 
+u8 hpte_page_sizes[1 << LP_BITS];
+EXPORT_SYMBOL_GPL(hpte_page_sizes);
+
 struct hash_pte *htab_address;
 unsigned long htab_size_bytes;
 unsigned long htab_hash_mask;
@@ -564,8 +567,60 @@ static void __init htab_scan_page_sizes(void)
 #endif /* CONFIG_HUGETLB_PAGE */
 }
 
+/*
+ * Fill in the hpte_page_sizes[] array.
+ * We go through the mmu_psize_defs[] array looking for all the
+ * supported base/actual page size combinations.  Each combination
+ * has a unique pagesize encoding (penc) value in the low bits of
+ * the LP field of the HPTE.  For actual page sizes less than 1MB,
+ * some of the upper LP bits are used for RPN bits, meaning that
+ * we need to fill in several entries in hpte_page_sizes[].
+ *
+ * In diagrammatic form, with r = RPN bits and z = page size bits:
+ *        PTE LP     actual page size
+ *    rrrr rrrz                >=8KB
+ *    rrrr rrzz                >=16KB
+ *    rrrr rzzz                >=32KB
+ *    rrrr zzzz                >=64KB
+ *    ...
+ *
+ * The zzzz bits are implementation-specific but are chosen so that
+ * no encoding for a larger page size uses the same value in its
+ * low-order N bits as the encoding for the 2^(12+N) byte page size
+ * (if it exists).
+ */
+static void init_hpte_page_sizes(void)
+{
+       long int ap, bp;
+       long int shift, penc;
+
+       for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
+               if (!mmu_psize_defs[bp].shift)
+                       continue;       /* not a supported page size */
+               for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
+                       penc = mmu_psize_defs[bp].penc[ap];
+                       if (penc == -1)
+                               continue;
+                       shift = mmu_psize_defs[ap].shift - LP_SHIFT;
+                       if (shift <= 0)
+                               continue;       /* should never happen */
+                       /*
+                        * For page sizes less than 1MB, this loop
+                        * replicates the entry for all possible values
+                        * of the rrrr bits.
+                        */
+                       while (penc < (1 << LP_BITS)) {
+                               hpte_page_sizes[penc] = (ap << 4) | bp;
+                               penc += 1 << shift;
+                       }
+               }
+       }
+}
+
 static void __init htab_init_page_sizes(void)
 {
+       init_hpte_page_sizes();
+
        if (!debug_pagealloc_enabled()) {
                /*
                 * Pick a size for the linear mapping. Currently, we only
index 3d29d40eb0e9e943150850c93619197cda85fefe..44d2d842cee79009f012842dcfbda7a1896b00f4 100644 (file)
@@ -208,6 +208,7 @@ OPAL_CALL(opal_pci_config_write_byte,               OPAL_PCI_CONFIG_WRITE_BYTE);
 OPAL_CALL(opal_pci_config_write_half_word,     OPAL_PCI_CONFIG_WRITE_HALF_WORD);
 OPAL_CALL(opal_pci_config_write_word,          OPAL_PCI_CONFIG_WRITE_WORD);
 OPAL_CALL(opal_set_xive,                       OPAL_SET_XIVE);
+OPAL_CALL_REAL(opal_rm_set_xive,               OPAL_SET_XIVE);
 OPAL_CALL(opal_get_xive,                       OPAL_GET_XIVE);
 OPAL_CALL(opal_register_exception_handler,     OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
 OPAL_CALL(opal_pci_eeh_freeze_status,          OPAL_PCI_EEH_FREEZE_STATUS);
index c16d790808f18cae1248d444253f12bee3c7fdd7..71dde6c9d46f25a0875df1428af43f47fc2e2362 100644 (file)
@@ -2711,15 +2711,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 }
 
 #ifdef CONFIG_PCI_MSI
-static void pnv_ioda2_msi_eoi(struct irq_data *d)
+int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
 {
-       unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-       struct irq_chip *chip = irq_data_get_irq_chip(d);
        struct pnv_phb *phb = container_of(chip, struct pnv_phb,
                                           ioda.irq_chip);
+
+       return opal_pci_msi_eoi(phb->opal_id, hw_irq);
+}
+
+static void pnv_ioda2_msi_eoi(struct irq_data *d)
+{
        int64_t rc;
+       unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+       struct irq_chip *chip = irq_data_get_irq_chip(d);
 
-       rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
+       rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
        WARN_ON_ONCE(rc);
 
        icp_native_eoi(d);
@@ -2749,6 +2755,16 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
        irq_set_chip(virq, &phb->ioda.irq_chip);
 }
 
+/*
+ * Returns true iff chip is something that we could call
+ * pnv_opal_pci_msi_eoi for.
+ */
+bool is_pnv_opal_msi(struct irq_chip *chip)
+{
+       return chip->irq_eoi == pnv_ioda2_msi_eoi;
+}
+EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
+
 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
                                  unsigned int hwirq, unsigned int virq,
                                  unsigned int is_64, struct msi_msg *msg)
index 876173ca815fd4ee0d1053a25968db04372c749f..a41faf34b034c86a7e0bb214597ac30f400ddae4 100644 (file)
@@ -245,72 +245,72 @@ struct sie_page {
 } __packed;
 
 struct kvm_vcpu_stat {
-       u32 exit_userspace;
-       u32 exit_null;
-       u32 exit_external_request;
-       u32 exit_external_interrupt;
-       u32 exit_stop_request;
-       u32 exit_validity;
-       u32 exit_instruction;
-       u32 exit_pei;
-       u32 halt_successful_poll;
-       u32 halt_attempted_poll;
-       u32 halt_poll_invalid;
-       u32 halt_wakeup;
-       u32 instruction_lctl;
-       u32 instruction_lctlg;
-       u32 instruction_stctl;
-       u32 instruction_stctg;
-       u32 exit_program_interruption;
-       u32 exit_instr_and_program;
-       u32 exit_operation_exception;
-       u32 deliver_external_call;
-       u32 deliver_emergency_signal;
-       u32 deliver_service_signal;
-       u32 deliver_virtio_interrupt;
-       u32 deliver_stop_signal;
-       u32 deliver_prefix_signal;
-       u32 deliver_restart_signal;
-       u32 deliver_program_int;
-       u32 deliver_io_int;
-       u32 exit_wait_state;
-       u32 instruction_pfmf;
-       u32 instruction_stidp;
-       u32 instruction_spx;
-       u32 instruction_stpx;
-       u32 instruction_stap;
-       u32 instruction_storage_key;
-       u32 instruction_ipte_interlock;
-       u32 instruction_stsch;
-       u32 instruction_chsc;
-       u32 instruction_stsi;
-       u32 instruction_stfl;
-       u32 instruction_tprot;
-       u32 instruction_sie;
-       u32 instruction_essa;
-       u32 instruction_sthyi;
-       u32 instruction_sigp_sense;
-       u32 instruction_sigp_sense_running;
-       u32 instruction_sigp_external_call;
-       u32 instruction_sigp_emergency;
-       u32 instruction_sigp_cond_emergency;
-       u32 instruction_sigp_start;
-       u32 instruction_sigp_stop;
-       u32 instruction_sigp_stop_store_status;
-       u32 instruction_sigp_store_status;
-       u32 instruction_sigp_store_adtl_status;
-       u32 instruction_sigp_arch;
-       u32 instruction_sigp_prefix;
-       u32 instruction_sigp_restart;
-       u32 instruction_sigp_init_cpu_reset;
-       u32 instruction_sigp_cpu_reset;
-       u32 instruction_sigp_unknown;
-       u32 diagnose_10;
-       u32 diagnose_44;
-       u32 diagnose_9c;
-       u32 diagnose_258;
-       u32 diagnose_308;
-       u32 diagnose_500;
+       u64 exit_userspace;
+       u64 exit_null;
+       u64 exit_external_request;
+       u64 exit_external_interrupt;
+       u64 exit_stop_request;
+       u64 exit_validity;
+       u64 exit_instruction;
+       u64 exit_pei;
+       u64 halt_successful_poll;
+       u64 halt_attempted_poll;
+       u64 halt_poll_invalid;
+       u64 halt_wakeup;
+       u64 instruction_lctl;
+       u64 instruction_lctlg;
+       u64 instruction_stctl;
+       u64 instruction_stctg;
+       u64 exit_program_interruption;
+       u64 exit_instr_and_program;
+       u64 exit_operation_exception;
+       u64 deliver_external_call;
+       u64 deliver_emergency_signal;
+       u64 deliver_service_signal;
+       u64 deliver_virtio_interrupt;
+       u64 deliver_stop_signal;
+       u64 deliver_prefix_signal;
+       u64 deliver_restart_signal;
+       u64 deliver_program_int;
+       u64 deliver_io_int;
+       u64 exit_wait_state;
+       u64 instruction_pfmf;
+       u64 instruction_stidp;
+       u64 instruction_spx;
+       u64 instruction_stpx;
+       u64 instruction_stap;
+       u64 instruction_storage_key;
+       u64 instruction_ipte_interlock;
+       u64 instruction_stsch;
+       u64 instruction_chsc;
+       u64 instruction_stsi;
+       u64 instruction_stfl;
+       u64 instruction_tprot;
+       u64 instruction_sie;
+       u64 instruction_essa;
+       u64 instruction_sthyi;
+       u64 instruction_sigp_sense;
+       u64 instruction_sigp_sense_running;
+       u64 instruction_sigp_external_call;
+       u64 instruction_sigp_emergency;
+       u64 instruction_sigp_cond_emergency;
+       u64 instruction_sigp_start;
+       u64 instruction_sigp_stop;
+       u64 instruction_sigp_stop_store_status;
+       u64 instruction_sigp_store_status;
+       u64 instruction_sigp_store_adtl_status;
+       u64 instruction_sigp_arch;
+       u64 instruction_sigp_prefix;
+       u64 instruction_sigp_restart;
+       u64 instruction_sigp_init_cpu_reset;
+       u64 instruction_sigp_cpu_reset;
+       u64 instruction_sigp_unknown;
+       u64 diagnose_10;
+       u64 diagnose_44;
+       u64 diagnose_9c;
+       u64 diagnose_258;
+       u64 diagnose_308;
+       u64 diagnose_500;
 };
 
 #define PGM_OPERATION                  0x01
@@ -577,7 +577,7 @@ struct kvm_vcpu_arch {
 };
 
 struct kvm_vm_stat {
-       u32 remote_tlb_flush;
+       ulong remote_tlb_flush;
 };
 
 struct kvm_arch_memory_slot {
index 4c738c206be3830b66d387b951e4feac861985ed..cd82bf74e7a551ad3954498b03af7e1cec8021b1 100644 (file)
@@ -792,45 +792,45 @@ struct kvm_arch {
 };
 
 struct kvm_vm_stat {
-       u32 mmu_shadow_zapped;
-       u32 mmu_pte_write;
-       u32 mmu_pte_updated;
-       u32 mmu_pde_zapped;
-       u32 mmu_flooded;
-       u32 mmu_recycled;
-       u32 mmu_cache_miss;
-       u32 mmu_unsync;
-       u32 remote_tlb_flush;
-       u32 lpages;
+       ulong mmu_shadow_zapped;
+       ulong mmu_pte_write;
+       ulong mmu_pte_updated;
+       ulong mmu_pde_zapped;
+       ulong mmu_flooded;
+       ulong mmu_recycled;
+       ulong mmu_cache_miss;
+       ulong mmu_unsync;
+       ulong remote_tlb_flush;
+       ulong lpages;
 };
 
 struct kvm_vcpu_stat {
-       u32 pf_fixed;
-       u32 pf_guest;
-       u32 tlb_flush;
-       u32 invlpg;
-
-       u32 exits;
-       u32 io_exits;
-       u32 mmio_exits;
-       u32 signal_exits;
-       u32 irq_window_exits;
-       u32 nmi_window_exits;
-       u32 halt_exits;
-       u32 halt_successful_poll;
-       u32 halt_attempted_poll;
-       u32 halt_poll_invalid;
-       u32 halt_wakeup;
-       u32 request_irq_exits;
-       u32 irq_exits;
-       u32 host_state_reload;
-       u32 efer_reload;
-       u32 fpu_reload;
-       u32 insn_emulation;
-       u32 insn_emulation_fail;
-       u32 hypercalls;
-       u32 irq_injections;
-       u32 nmi_injections;
+       u64 pf_fixed;
+       u64 pf_guest;
+       u64 tlb_flush;
+       u64 invlpg;
+
+       u64 exits;
+       u64 io_exits;
+       u64 mmio_exits;
+       u64 signal_exits;
+       u64 irq_window_exits;
+       u64 nmi_window_exits;
+       u64 halt_exits;
+       u64 halt_successful_poll;
+       u64 halt_attempted_poll;
+       u64 halt_poll_invalid;
+       u64 halt_wakeup;
+       u64 request_irq_exits;
+       u64 irq_exits;
+       u64 host_state_reload;
+       u64 efer_reload;
+       u64 fpu_reload;
+       u64 insn_emulation;
+       u64 insn_emulation_fail;
+       u64 hypercalls;
+       u64 irq_injections;
+       u64 nmi_injections;
 };
 
 struct x86_instruction_info;
index b3fa12ce116687b1b7ae8e468d5c71637e26ca3e..a00f8e4045cf96973986ed680bebd8f164ad04a6 100644 (file)
@@ -3619,7 +3619,7 @@ static int vm_stat_get_per_vm(void *data, u64 *val)
 {
        struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
 
-       *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset);
+       *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset);
 
        return 0;
 }
@@ -3649,7 +3649,7 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val)
        *val = 0;
 
        kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
-               *val += *(u32 *)((void *)vcpu + stat_data->offset);
+               *val += *(u64 *)((void *)vcpu + stat_data->offset);
 
        return 0;
 }
This page took 0.068689 seconds and 5 git commands to generate.