Merge remote-tracking branch 'kvm/linux-next'
authorStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 02:21:37 +0000 (12:21 +1000)
committerStephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2016 02:21:37 +0000 (12:21 +1000)
19 files changed:
arch/s390/include/asm/kvm_host.h
arch/s390/kernel/asm-offsets.c
arch/s390/kvm/gaccess.c
arch/s390/kvm/guestdbg.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
virt/kvm/arm/arch_timer.c
virt/kvm/eventfd.c
virt/kvm/kvm_main.c

index 8e5daf7a76cedf2e88456af8caa69075250e9c86..876173ca815fd4ee0d1053a25968db04372c749f 100644 (file)
@@ -28,7 +28,7 @@
 
 #define KVM_S390_BSCA_CPU_SLOTS 64
 #define KVM_S390_ESCA_CPU_SLOTS 248
-#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS
+#define KVM_MAX_VCPUS 255
 #define KVM_USER_MEM_SLOTS 32
 
 /*
index 1f95cc1faeb7624031b32c282953cc05cc001c3c..f3df9e0a5dec6383387097a0edb7cf591077b687 100644 (file)
@@ -125,6 +125,7 @@ int main(void)
        OFFSET(__LC_STFL_FAC_LIST, lowcore, stfl_fac_list);
        OFFSET(__LC_STFLE_FAC_LIST, lowcore, stfle_fac_list);
        OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code);
+       OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code);
        OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
        OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr);
        OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
index 54200208bf24fe73f0fdb8c7b1aac639e6160e86..4aa8a7e2a1da479c8e33162814d1e9f1a4908952 100644 (file)
@@ -495,6 +495,18 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
        tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
 
        switch (code) {
+       case PGM_PROTECTION:
+               switch (prot) {
+               case PROT_TYPE_ALC:
+                       tec->b60 = 1;
+                       /* FALL THROUGH */
+               case PROT_TYPE_DAT:
+                       tec->b61 = 1;
+                       break;
+               default: /* LA and KEYC set b61 to 0, other params undefined */
+                       return code;
+               }
+               /* FALL THROUGH */
        case PGM_ASCE_TYPE:
        case PGM_PAGE_TRANSLATION:
        case PGM_REGION_FIRST_TRANS:
@@ -504,8 +516,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
                /*
                 * op_access_id only applies to MOVE_PAGE -> set bit 61
                 * exc_access_id has to be set to 0 for some instructions. Both
-                * cases have to be handled by the caller. We can always store
-                * exc_access_id, as it is undefined for non-ar cases.
+                * cases have to be handled by the caller.
                 */
                tec->addr = gva >> PAGE_SHIFT;
                tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
@@ -516,25 +527,13 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
        case PGM_ASTE_VALIDITY:
        case PGM_ASTE_SEQUENCE:
        case PGM_EXTENDED_AUTHORITY:
+               /*
+                * We can always store exc_access_id, as it is
+                * undefined for non-ar cases. It is undefined for
+                * most DAT protection exceptions.
+                */
                pgm->exc_access_id = ar;
                break;
-       case PGM_PROTECTION:
-               switch (prot) {
-               case PROT_TYPE_ALC:
-                       tec->b60 = 1;
-                       /* FALL THROUGH */
-               case PROT_TYPE_DAT:
-                       tec->b61 = 1;
-                       tec->addr = gva >> PAGE_SHIFT;
-                       tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
-                       tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
-                       /* exc_access_id is undefined for most cases */
-                       pgm->exc_access_id = ar;
-                       break;
-               default: /* LA and KEYC set b61 to 0, other params undefined */
-                       break;
-               }
-               break;
        }
        return code;
 }
index 31a05330d11c77fc0c34cd30ea54bc2075a2c3d9..d7c6a7f53cedb3b1694589d6644346570b12ea1b 100644 (file)
@@ -206,7 +206,7 @@ static int __import_wp_info(struct kvm_vcpu *vcpu,
 int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
                            struct kvm_guest_debug *dbg)
 {
-       int ret = 0, nr_wp = 0, nr_bp = 0, i, size;
+       int ret = 0, nr_wp = 0, nr_bp = 0, i;
        struct kvm_hw_breakpoint *bp_data = NULL;
        struct kvm_hw_wp_info_arch *wp_info = NULL;
        struct kvm_hw_bp_info_arch *bp_info = NULL;
@@ -216,17 +216,10 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
        else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
                return -EINVAL;
 
-       size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint);
-       bp_data = kmalloc(size, GFP_KERNEL);
-       if (!bp_data) {
-               ret = -ENOMEM;
-               goto error;
-       }
-
-       if (copy_from_user(bp_data, dbg->arch.hw_bp, size)) {
-               ret = -EFAULT;
-               goto error;
-       }
+       bp_data = memdup_user(dbg->arch.hw_bp,
+                             sizeof(*bp_data) * dbg->arch.nr_hw_bp);
+       if (IS_ERR(bp_data))
+               return PTR_ERR(bp_data);
 
        for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
                switch (bp_data[i].type) {
@@ -241,17 +234,19 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
                }
        }
 
-       size = nr_wp * sizeof(struct kvm_hw_wp_info_arch);
-       if (size > 0) {
-               wp_info = kmalloc(size, GFP_KERNEL);
+       if (nr_wp > 0) {
+               wp_info = kmalloc_array(nr_wp,
+                                       sizeof(*wp_info),
+                                       GFP_KERNEL);
                if (!wp_info) {
                        ret = -ENOMEM;
                        goto error;
                }
        }
-       size = nr_bp * sizeof(struct kvm_hw_bp_info_arch);
-       if (size > 0) {
-               bp_info = kmalloc(size, GFP_KERNEL);
+       if (nr_bp > 0) {
+               bp_info = kmalloc_array(nr_bp,
+                                       sizeof(*bp_info),
+                                       GFP_KERNEL);
                if (!bp_info) {
                        ret = -ENOMEM;
                        goto error;
@@ -382,14 +377,20 @@ void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
        vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
 }
 
+#define PER_CODE_MASK          (PER_EVENT_MASK >> 24)
+#define PER_CODE_BRANCH                (PER_EVENT_BRANCH >> 24)
+#define PER_CODE_IFETCH                (PER_EVENT_IFETCH >> 24)
+#define PER_CODE_STORE         (PER_EVENT_STORE >> 24)
+#define PER_CODE_STORE_REAL    (PER_EVENT_STORE_REAL >> 24)
+
 #define per_bp_event(code) \
-                       (code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH))
+                       (code & (PER_CODE_IFETCH | PER_CODE_BRANCH))
 #define per_write_wp_event(code) \
-                       (code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL))
+                       (code & (PER_CODE_STORE | PER_CODE_STORE_REAL))
 
 static int debug_exit_required(struct kvm_vcpu *vcpu)
 {
-       u32 perc = (vcpu->arch.sie_block->perc << 24);
+       u8 perc = vcpu->arch.sie_block->perc;
        struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
        struct kvm_hw_wp_info_arch *wp_info = NULL;
        struct kvm_hw_bp_info_arch *bp_info = NULL;
@@ -444,7 +445,7 @@ int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu)
        const u8 ilen = kvm_s390_get_ilen(vcpu);
        struct kvm_s390_pgm_info pgm_info = {
                .code = PGM_PER,
-               .per_code = PER_EVENT_IFETCH >> 24,
+               .per_code = PER_CODE_IFETCH,
                .per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen),
        };
 
@@ -458,33 +459,33 @@ int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu)
 
 static void filter_guest_per_event(struct kvm_vcpu *vcpu)
 {
-       u32 perc = vcpu->arch.sie_block->perc << 24;
+       const u8 perc = vcpu->arch.sie_block->perc;
        u64 peraddr = vcpu->arch.sie_block->peraddr;
        u64 addr = vcpu->arch.sie_block->gpsw.addr;
        u64 cr9 = vcpu->arch.sie_block->gcr[9];
        u64 cr10 = vcpu->arch.sie_block->gcr[10];
        u64 cr11 = vcpu->arch.sie_block->gcr[11];
        /* filter all events, demanded by the guest */
-       u32 guest_perc = perc & cr9 & PER_EVENT_MASK;
+       u8 guest_perc = perc & (cr9 >> 24) & PER_CODE_MASK;
 
        if (!guest_per_enabled(vcpu))
                guest_perc = 0;
 
        /* filter "successful-branching" events */
-       if (guest_perc & PER_EVENT_BRANCH &&
+       if (guest_perc & PER_CODE_BRANCH &&
            cr9 & PER_CONTROL_BRANCH_ADDRESS &&
            !in_addr_range(addr, cr10, cr11))
-               guest_perc &= ~PER_EVENT_BRANCH;
+               guest_perc &= ~PER_CODE_BRANCH;
 
        /* filter "instruction-fetching" events */
-       if (guest_perc & PER_EVENT_IFETCH &&
+       if (guest_perc & PER_CODE_IFETCH &&
            !in_addr_range(peraddr, cr10, cr11))
-               guest_perc &= ~PER_EVENT_IFETCH;
+               guest_perc &= ~PER_CODE_IFETCH;
 
        /* All other PER events will be given to the guest */
        /* TODO: Check altered address/address space */
 
-       vcpu->arch.sie_block->perc = guest_perc >> 24;
+       vcpu->arch.sie_block->perc = guest_perc;
 
        if (!guest_perc)
                vcpu->arch.sie_block->iprcc &= ~PGM_PER;
index dfd0ca2638fa39a498a7ed363244158ba9917ca0..1cab8a177d0e7b7c80e1556e659c4751b0657187 100644 (file)
@@ -29,6 +29,7 @@ static const intercept_handler_t instruction_handlers[256] = {
        [0x01] = kvm_s390_handle_01,
        [0x82] = kvm_s390_handle_lpsw,
        [0x83] = kvm_s390_handle_diag,
+       [0xaa] = kvm_s390_handle_aa,
        [0xae] = kvm_s390_handle_sigp,
        [0xb2] = kvm_s390_handle_b2,
        [0xb6] = kvm_s390_handle_stctl,
index 24524c0f3ef88b2987c7bc1d54d924e811428286..be4db07f70d385af8f8b1230943eb8cbbd747d0a 100644 (file)
@@ -24,6 +24,8 @@
 #include <asm/sclp.h>
 #include <asm/isc.h>
 #include <asm/gmap.h>
+#include <asm/switch_to.h>
+#include <asm/nmi.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
@@ -40,6 +42,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
        if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
                return 0;
 
+       BUG_ON(!kvm_s390_use_sca_entries());
        read_lock(&vcpu->kvm->arch.sca_lock);
        if (vcpu->kvm->arch.use_esca) {
                struct esca_block *sca = vcpu->kvm->arch.sca;
@@ -68,6 +71,7 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 {
        int expect, rc;
 
+       BUG_ON(!kvm_s390_use_sca_entries());
        read_lock(&vcpu->kvm->arch.sca_lock);
        if (vcpu->kvm->arch.use_esca) {
                struct esca_block *sca = vcpu->kvm->arch.sca;
@@ -109,6 +113,8 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        int rc, expect;
 
+       if (!kvm_s390_use_sca_entries())
+               return;
        atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
        read_lock(&vcpu->kvm->arch.sca_lock);
        if (vcpu->kvm->arch.use_esca) {
@@ -400,12 +406,78 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
        return rc ? -EFAULT : 0;
 }
 
+static int __write_machine_check(struct kvm_vcpu *vcpu,
+                                struct kvm_s390_mchk_info *mchk)
+{
+       unsigned long ext_sa_addr;
+       freg_t fprs[NUM_FPRS];
+       union mci mci;
+       int rc;
+
+       mci.val = mchk->mcic;
+       /* take care of lazy register loading via vcpu load/put */
+       save_fpu_regs();
+       save_access_regs(vcpu->run->s.regs.acrs);
+
+       /* Extended save area */
+       rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr,
+                           sizeof(unsigned long));
+       /* Only bits 0-53 are used for address formation */
+       ext_sa_addr &= ~0x3ffUL;
+       if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
+               if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs,
+                                   512))
+                       mci.vr = 0;
+       } else {
+               mci.vr = 0;
+       }
+
+       /* General interruption information */
+       rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID);
+       rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= put_guest_lc(vcpu, mci.val, (u64 __user *) __LC_MCCK_CODE);
+
+       /* Register-save areas */
+       if (MACHINE_HAS_VX) {
+               convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
+               rc |= write_guest_lc(vcpu, __LC_FPREGS_SAVE_AREA, fprs, 128);
+       } else {
+               rc |= write_guest_lc(vcpu, __LC_FPREGS_SAVE_AREA,
+                                    vcpu->run->s.regs.fprs, 128);
+       }
+       rc |= write_guest_lc(vcpu, __LC_GPREGS_SAVE_AREA,
+                            vcpu->run->s.regs.gprs, 128);
+       rc |= put_guest_lc(vcpu, current->thread.fpu.fpc,
+                          (u32 __user *) __LC_FP_CREG_SAVE_AREA);
+       rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->todpr,
+                          (u32 __user *) __LC_TOD_PROGREG_SAVE_AREA);
+       rc |= put_guest_lc(vcpu, kvm_s390_get_cpu_timer(vcpu),
+                          (u64 __user *) __LC_CPU_TIMER_SAVE_AREA);
+       rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->ckc >> 8,
+                          (u64 __user *) __LC_CLOCK_COMP_SAVE_AREA);
+       rc |= write_guest_lc(vcpu, __LC_AREGS_SAVE_AREA,
+                            &vcpu->run->s.regs.acrs, 64);
+       rc |= write_guest_lc(vcpu, __LC_CREGS_SAVE_AREA,
+                            &vcpu->arch.sie_block->gcr, 128);
+
+       /* Extended interruption information */
+       rc |= put_guest_lc(vcpu, mchk->ext_damage_code,
+                          (u32 __user *) __LC_EXT_DAMAGE_CODE);
+       rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
+                          (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, &mchk->fixed_logout,
+                            sizeof(mchk->fixed_logout));
+       return rc ? -EFAULT : 0;
+}
+
 static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        struct kvm_s390_mchk_info mchk = {};
-       unsigned long adtl_status_addr;
        int deliver = 0;
        int rc = 0;
 
@@ -446,29 +518,9 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
                                                 KVM_S390_MCHK,
                                                 mchk.cr14, mchk.mcic);
-
-               rc  = kvm_s390_vcpu_store_status(vcpu,
-                                                KVM_S390_STORE_STATUS_PREFIXED);
-               rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR,
-                                   &adtl_status_addr,
-                                   sizeof(unsigned long));
-               rc |= kvm_s390_vcpu_store_adtl_status(vcpu,
-                                                     adtl_status_addr);
-               rc |= put_guest_lc(vcpu, mchk.mcic,
-                                  (u64 __user *) __LC_MCCK_CODE);
-               rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
-                                  (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
-                                    &mchk.fixed_logout,
-                                    sizeof(mchk.fixed_logout));
-               rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw,
-                                   sizeof(psw_t));
+               rc = __write_machine_check(vcpu, &mchk);
        }
-       return rc ? -EFAULT : 0;
+       return rc;
 }
 
 static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
index d6e7e527f0bf3963dd8593b50969be0731ad345b..6b5c7c519e70ffcaca33018ce363f00ac249168d 100644 (file)
@@ -384,7 +384,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_NR_VCPUS:
        case KVM_CAP_MAX_VCPUS:
                r = KVM_S390_BSCA_CPU_SLOTS;
-               if (sclp.has_esca && sclp.has_64bscao)
+               if (!kvm_s390_use_sca_entries())
+                       r = KVM_MAX_VCPUS;
+               else if (sclp.has_esca && sclp.has_64bscao)
                        r = KVM_S390_ESCA_CPU_SLOTS;
                break;
        case KVM_CAP_NR_MEMSLOTS:
@@ -1561,6 +1563,8 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 {
+       if (!kvm_s390_use_sca_entries())
+               return;
        read_lock(&vcpu->kvm->arch.sca_lock);
        if (vcpu->kvm->arch.use_esca) {
                struct esca_block *sca = vcpu->kvm->arch.sca;
@@ -1578,6 +1582,13 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 
 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
 {
+       if (!kvm_s390_use_sca_entries()) {
+               struct bsca_block *sca = vcpu->kvm->arch.sca;
+
+               /* we still need the basic sca for the ipte control */
+               vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+               vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+       }
        read_lock(&vcpu->kvm->arch.sca_lock);
        if (vcpu->kvm->arch.use_esca) {
                struct esca_block *sca = vcpu->kvm->arch.sca;
@@ -1658,6 +1669,11 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
 {
        int rc;
 
+       if (!kvm_s390_use_sca_entries()) {
+               if (id < KVM_MAX_VCPUS)
+                       return true;
+               return false;
+       }
        if (id < KVM_S390_BSCA_CPU_SLOTS)
                return true;
        if (!sclp.has_esca || !sclp.has_64bscao)
@@ -1946,8 +1962,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
                vcpu->arch.sie_block->eca |= 1;
        if (sclp.has_sigpif)
                vcpu->arch.sie_block->eca |= 0x10000000U;
-       if (test_kvm_facility(vcpu->kvm, 64))
-               vcpu->arch.sie_block->ecb3 |= 0x01;
        if (test_kvm_facility(vcpu->kvm, 129)) {
                vcpu->arch.sie_block->eca |= 0x00020000;
                vcpu->arch.sie_block->ecd |= 0x20000000;
@@ -2702,6 +2716,19 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
                        kvm_clear_async_pf_completion_queue(vcpu);
        }
+       /*
+        * If userspace sets the riccb (e.g. after migration) to a valid state,
+        * we should enable RI here instead of doing the lazy enablement.
+        */
+       if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
+           test_kvm_facility(vcpu->kvm, 64)) {
+               struct runtime_instr_cb *riccb =
+                       (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
+
+               if (riccb->valid)
+                       vcpu->arch.sie_block->ecb3 |= 0x01;
+       }
+
        kvm_run->kvm_dirty_regs = 0;
 }
 
@@ -2845,38 +2872,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
        return kvm_s390_store_status_unloaded(vcpu, addr);
 }
 
-/*
- * store additional status at address
- */
-int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
-                                       unsigned long gpa)
-{
-       /* Only bits 0-53 are used for address formation */
-       if (!(gpa & ~0x3ff))
-               return 0;
-
-       return write_guest_abs(vcpu, gpa & ~0x3ff,
-                              (void *)&vcpu->run->s.regs.vrs, 512);
-}
-
-int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
-{
-       if (!test_kvm_facility(vcpu->kvm, 129))
-               return 0;
-
-       /*
-        * The guest VXRS are in the host VXRs due to the lazy
-        * copying in vcpu load/put. We can simply call save_fpu_regs()
-        * to save the current register state because we are in the
-        * middle of a load/put cycle.
-        *
-        * Let's update our copies before we save it into the save area.
-        */
-       save_fpu_regs();
-
-       return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
-}
-
 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
 {
        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
index b8432862a81715761fbe274071d0988021b61a54..3a4e97f1a9e6e1618faf1b6e65c16e109df7a09d 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/kvm_host.h>
 #include <asm/facility.h>
 #include <asm/processor.h>
+#include <asm/sclp.h>
 
 typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 
@@ -245,6 +246,7 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
 
 /* implemented in priv.c */
 int is_valid_psw(psw_t *psw);
+int kvm_s390_handle_aa(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
@@ -273,10 +275,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu);
 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
-int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
-                                       unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
-int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
@@ -389,4 +388,13 @@ static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
 
        return &sca->ipte_control;
 }
+static inline int kvm_s390_use_sca_entries(void)
+{
+       /*
+        * Without SIGP interpretation, only SRS interpretation (if available)
+        * might use the entries. By not setting the entries and keeping them
+        * invalid, hardware will not access them but intercept.
+        */
+       return sclp.has_sigpif;
+}
 #endif
index 46160388e9964ba201a5c61768cc83eaa19eb4bb..e18435355c16f454fdee19e155af08493abe1d44 100644 (file)
 #include "kvm-s390.h"
 #include "trace.h"
 
+static int handle_ri(struct kvm_vcpu *vcpu)
+{
+       if (test_kvm_facility(vcpu->kvm, 64)) {
+               vcpu->arch.sie_block->ecb3 |= 0x01;
+               kvm_s390_retry_instr(vcpu);
+               return 0;
+       } else
+               return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
+int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
+{
+       if ((vcpu->arch.sie_block->ipa & 0xf) <= 4)
+               return handle_ri(vcpu);
+       else
+               return -EOPNOTSUPP;
+}
+
 /* Handle SCK (SET CLOCK) interception */
 static int handle_set_clock(struct kvm_vcpu *vcpu)
 {
@@ -1093,6 +1111,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 static const intercept_handler_t eb_handlers[256] = {
        [0x2f] = handle_lctlg,
        [0x25] = handle_stctg,
+       [0x60] = handle_ri,
+       [0x61] = handle_ri,
+       [0x62] = handle_ri,
 };
 
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
index 33ae3a4d0159893cacdaf54bbcf3bcbf3be127fd..4c738c206be3830b66d387b951e4feac861985ed 100644 (file)
@@ -781,9 +781,11 @@ struct kvm_arch {
        bool disabled_lapic_found;
 
        /* Struct members for AVIC */
+       u32 avic_vm_id;
        u32 ldr_mode;
        struct page *avic_logical_id_table_page;
        struct page *avic_physical_id_table_page;
+       struct hlist_node hnode;
 
        bool x2apic_format;
        bool x2apic_broadcast_quirk_disabled;
index 3235e0fe7792765fba4c2f2152cbbd19aa0e832f..afa7bbb596cd745be6494c53bd9764af503616e2 100644 (file)
@@ -366,7 +366,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
                F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
                F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
-               F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
+               F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+               F(AVX512BW) | F(AVX512VL);
 
        /* cpuid 0xD.1.eax */
        const u32 kvm_cpuid_D_1_eax_x86_features =
index b62c8522971180a4ce837c37a8fa079660bd2c27..23b99f3053825d40c6d697cf8c58dfb4ae941208 100644 (file)
@@ -1761,9 +1761,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
                if (value & MSR_IA32_APICBASE_ENABLE) {
                        kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
                        static_key_slow_dec_deferred(&apic_hw_disabled);
-               } else
+               } else {
                        static_key_slow_inc(&apic_hw_disabled.key);
-               recalculate_apic_map(vcpu->kvm);
+                       recalculate_apic_map(vcpu->kvm);
+               }
        }
 
        if ((old_value ^ value) & X2APIC_ENABLE) {
index 3d4cc8cc56a38b05752ef38133b74af9d0e7ae27..d9c7e986b4e4e7e0e7bd9bc28223485e42d7c3bb 100644 (file)
@@ -1207,7 +1207,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
  *
  * Return true if tlb need be flushed.
  */
-static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
+static bool spte_write_protect(u64 *sptep, bool pt_protect)
 {
        u64 spte = *sptep;
 
@@ -1233,12 +1233,12 @@ static bool __rmap_write_protect(struct kvm *kvm,
        bool flush = false;
 
        for_each_rmap_spte(rmap_head, &iter, sptep)
-               flush |= spte_write_protect(kvm, sptep, pt_protect);
+               flush |= spte_write_protect(sptep, pt_protect);
 
        return flush;
 }
 
-static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep)
+static bool spte_clear_dirty(u64 *sptep)
 {
        u64 spte = *sptep;
 
@@ -1256,12 +1256,12 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
        bool flush = false;
 
        for_each_rmap_spte(rmap_head, &iter, sptep)
-               flush |= spte_clear_dirty(kvm, sptep);
+               flush |= spte_clear_dirty(sptep);
 
        return flush;
 }
 
-static bool spte_set_dirty(struct kvm *kvm, u64 *sptep)
+static bool spte_set_dirty(u64 *sptep)
 {
        u64 spte = *sptep;
 
@@ -1279,7 +1279,7 @@ static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
        bool flush = false;
 
        for_each_rmap_spte(rmap_head, &iter, sptep)
-               flush |= spte_set_dirty(kvm, sptep);
+               flush |= spte_set_dirty(sptep);
 
        return flush;
 }
index 1e6b84b96ea697d7a047f9c6583b42807209e952..1b66c5afca840ef89ec32081aa2971aee6ccfffc 100644 (file)
@@ -34,6 +34,8 @@
 #include <linux/sched.h>
 #include <linux/trace_events.h>
 #include <linux/slab.h>
+#include <linux/amd-iommu.h>
+#include <linux/hashtable.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
@@ -41,6 +43,7 @@
 #include <asm/desc.h>
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
+#include <asm/irq_remapping.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -96,6 +99,19 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
 #define AVIC_UNACCEL_ACCESS_OFFSET_MASK                0xFF0
 #define AVIC_UNACCEL_ACCESS_VECTOR_MASK                0xFFFFFFFF
 
+/* AVIC GATAG is encoded using VM and VCPU IDs */
+#define AVIC_VCPU_ID_BITS              8
+#define AVIC_VCPU_ID_MASK              ((1 << AVIC_VCPU_ID_BITS) - 1)
+
+#define AVIC_VM_ID_BITS                        24
+#define AVIC_VM_ID_NR                  (1 << AVIC_VM_ID_BITS)
+#define AVIC_VM_ID_MASK                        ((1 << AVIC_VM_ID_BITS) - 1)
+
+#define AVIC_GATAG(x, y)               (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
+                                               (y & AVIC_VCPU_ID_MASK))
+#define AVIC_GATAG_TO_VMID(x)          ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
+#define AVIC_GATAG_TO_VCPUID(x)                (x & AVIC_VCPU_ID_MASK)
+
 static bool erratum_383_found __read_mostly;
 
 static const u32 host_save_user_msrs[] = {
@@ -185,6 +201,23 @@ struct vcpu_svm {
        struct page *avic_backing_page;
        u64 *avic_physical_id_cache;
        bool avic_is_running;
+
+       /*
+        * Per-vcpu list of struct amd_svm_iommu_ir:
+        * This is used mainly to store interrupt remapping information used
+        * when update the vcpu affinity. This avoids the need to scan for
+        * IRTE and try to match ga_tag in the IOMMU driver.
+        */
+       struct list_head ir_list;
+       spinlock_t ir_list_lock;
+};
+
+/*
+ * This is a wrapper of struct amd_iommu_ir_data.
+ */
+struct amd_svm_iommu_ir {
+       struct list_head node;  /* Used by SVM for per-vcpu ir_list */
+       void *data;             /* Storing pointer to struct amd_ir_data */
 };
 
 #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK   (0xFF)
@@ -242,6 +275,10 @@ static int avic;
 module_param(avic, int, S_IRUGO);
 #endif
 
+/* AVIC VM ID bit masks and lock */
+static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
+static DEFINE_SPINLOCK(avic_vm_id_lock);
+
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -928,6 +965,55 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
        set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }
 
+/* Note:
+ * This hash table is used to map VM_ID to a struct kvm_arch,
+ * when handling AMD IOMMU GALOG notification to schedule in
+ * a particular vCPU.
+ */
+#define SVM_VM_DATA_HASH_BITS  8
+DECLARE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
+static spinlock_t svm_vm_data_hash_lock;
+
+/* Note:
+ * This function is called from IOMMU driver to notify
+ * SVM to schedule in a particular vCPU of a particular VM.
+ */
+static int avic_ga_log_notifier(u32 ga_tag)
+{
+       unsigned long flags;
+       struct kvm_arch *ka = NULL;
+       struct kvm_vcpu *vcpu = NULL;
+       u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
+       u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
+
+       pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
+
+       spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+       hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
+               struct kvm *kvm = container_of(ka, struct kvm, arch);
+               struct kvm_arch *vm_data = &kvm->arch;
+
+               if (vm_data->avic_vm_id != vm_id)
+                       continue;
+               vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+               break;
+       }
+       spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
+
+       if (!vcpu)
+               return 0;
+
+       /* Note:
+        * At this point, the IOMMU should have already set the pending
+        * bit in the vAPIC backing page. So, we just need to schedule
+        * in the vcpu.
+        */
+       if (vcpu->mode == OUTSIDE_GUEST_MODE)
+               kvm_vcpu_wake_up(vcpu);
+
+       return 0;
+}
+
 static __init int svm_hardware_setup(void)
 {
        int cpu;
@@ -986,10 +1072,15 @@ static __init int svm_hardware_setup(void)
        if (avic) {
                if (!npt_enabled ||
                    !boot_cpu_has(X86_FEATURE_AVIC) ||
-                   !IS_ENABLED(CONFIG_X86_LOCAL_APIC))
+                   !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
                        avic = false;
-               else
+               } else {
                        pr_info("AVIC enabled\n");
+
+                       hash_init(svm_vm_data_hash);
+                       spin_lock_init(&svm_vm_data_hash_lock);
+                       amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+               }
        }
 
        return 0;
@@ -1280,18 +1371,54 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static inline int avic_get_next_vm_id(void)
+{
+       int id;
+
+       spin_lock(&avic_vm_id_lock);
+
+       /* AVIC VM ID is one-based. */
+       id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1);
+       if (id <= AVIC_VM_ID_MASK)
+               __set_bit(id, avic_vm_id_bitmap);
+       else
+               id = -EAGAIN;
+
+       spin_unlock(&avic_vm_id_lock);
+       return id;
+}
+
+static inline int avic_free_vm_id(int id)
+{
+       if (id <= 0 || id > AVIC_VM_ID_MASK)
+               return -EINVAL;
+
+       spin_lock(&avic_vm_id_lock);
+       __clear_bit(id, avic_vm_id_bitmap);
+       spin_unlock(&avic_vm_id_lock);
+       return 0;
+}
+
 static void avic_vm_destroy(struct kvm *kvm)
 {
+       unsigned long flags;
        struct kvm_arch *vm_data = &kvm->arch;
 
+       avic_free_vm_id(vm_data->avic_vm_id);
+
        if (vm_data->avic_logical_id_table_page)
                __free_page(vm_data->avic_logical_id_table_page);
        if (vm_data->avic_physical_id_table_page)
                __free_page(vm_data->avic_physical_id_table_page);
+
+       spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+       hash_del(&vm_data->hnode);
+       spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 }
 
 static int avic_vm_init(struct kvm *kvm)
 {
+       unsigned long flags;
        int err = -ENOMEM;
        struct kvm_arch *vm_data = &kvm->arch;
        struct page *p_page;
@@ -1300,6 +1427,10 @@ static int avic_vm_init(struct kvm *kvm)
        if (!avic)
                return 0;
 
+       vm_data->avic_vm_id = avic_get_next_vm_id();
+       if (vm_data->avic_vm_id < 0)
+               return vm_data->avic_vm_id;
+
        /* Allocating physical APIC ID table (4KB) */
        p_page = alloc_page(GFP_KERNEL);
        if (!p_page)
@@ -1316,6 +1447,10 @@ static int avic_vm_init(struct kvm *kvm)
        vm_data->avic_logical_id_table_page = l_page;
        clear_page(page_address(l_page));
 
+       spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+       hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
+       spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
+
        return 0;
 
 free_avic:
@@ -1323,31 +1458,34 @@ free_avic:
        return err;
 }
 
-/**
- * This function is called during VCPU halt/unhalt.
- */
-static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+static inline int
+avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 {
-       u64 entry;
-       int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu);
+       int ret = 0;
+       unsigned long flags;
+       struct amd_svm_iommu_ir *ir;
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       if (!kvm_vcpu_apicv_active(vcpu))
-               return;
-
-       svm->avic_is_running = is_run;
+       if (!kvm_arch_has_assigned_device(vcpu->kvm))
+               return 0;
 
-       /* ID = 0xff (broadcast), ID > 0xff (reserved) */
-       if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
-               return;
+       /*
+        * Here, we go through the per-vcpu ir_list to update all existing
+        * interrupt remapping table entry targeting this vcpu.
+        */
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
 
-       entry = READ_ONCE(*(svm->avic_physical_id_cache));
-       WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
+       if (list_empty(&svm->ir_list))
+               goto out;
 
-       entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
-       if (is_run)
-               entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
-       WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+       list_for_each_entry(ir, &svm->ir_list, node) {
+               ret = amd_iommu_update_ga(cpu, r, ir->data);
+               if (ret)
+                       break;
+       }
+out:
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+       return ret;
 }
 
 static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1374,6 +1512,8 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+       avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
+                                       svm->avic_is_running);
 }
 
 static void avic_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1385,10 +1525,27 @@ static void avic_vcpu_put(struct kvm_vcpu *vcpu)
                return;
 
        entry = READ_ONCE(*(svm->avic_physical_id_cache));
+       if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
+               avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+
        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 }
 
+/**
+ * This function is called during VCPU halt/unhalt.
+ */
+static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       svm->avic_is_running = is_run;
+       if (is_run)
+               avic_vcpu_load(vcpu, vcpu->cpu);
+       else
+               avic_vcpu_put(vcpu);
+}
+
 static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -1450,6 +1607,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
                err = avic_init_backing_page(&svm->vcpu);
                if (err)
                        goto free_page4;
+
+               INIT_LIST_HEAD(&svm->ir_list);
+               spin_lock_init(&svm->ir_list_lock);
        }
 
        /* We initialize this flag to true to make sure that the is_running
@@ -4246,6 +4406,209 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
                kvm_vcpu_wake_up(vcpu);
 }
 
+static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+{
+       unsigned long flags;
+       struct amd_svm_iommu_ir *cur;
+
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+       list_for_each_entry(cur, &svm->ir_list, node) {
+               if (cur->data != pi->ir_data)
+                       continue;
+               list_del(&cur->node);
+               kfree(cur);
+               break;
+       }
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+}
+
+static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+{
+       int ret = 0;
+       unsigned long flags;
+       struct amd_svm_iommu_ir *ir;
+
+       /**
+        * In some cases, the existing irte is updaed and re-set,
+        * so we need to check here if it's already been * added
+        * to the ir_list.
+        */
+       if (pi->ir_data && (pi->prev_ga_tag != 0)) {
+               struct kvm *kvm = svm->vcpu.kvm;
+               u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
+               struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+               struct vcpu_svm *prev_svm;
+
+               if (!prev_vcpu) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               prev_svm = to_svm(prev_vcpu);
+               svm_ir_list_del(prev_svm, pi);
+       }
+
+       /**
+        * Allocating new amd_iommu_pi_data, which will get
+        * add to the per-vcpu ir_list.
+        */
+       ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
+       if (!ir) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       ir->data = pi->ir_data;
+
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+       list_add(&ir->node, &svm->ir_list);
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+out:
+       return ret;
+}
+
+/**
+ * Note:
+ * The HW cannot support posting multicast/broadcast
+ * interrupts to a vCPU. So, we still use legacy interrupt
+ * remapping for these kind of interrupts.
+ *
+ * For lowest-priority interrupts, we only support
+ * those with single CPU as the destination, e.g. user
+ * configures the interrupts via /proc/irq or uses
+ * irqbalance to make the interrupts single-CPU.
+ */
+static int
+get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
+                struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
+{
+       struct kvm_lapic_irq irq;
+       struct kvm_vcpu *vcpu = NULL;
+
+       kvm_set_msi_irq(kvm, e, &irq);
+
+       if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+               pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
+                        __func__, irq.vector);
+               return -1;
+       }
+
+       pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
+                irq.vector);
+       *svm = to_svm(vcpu);
+       vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
+       vcpu_info->vector = irq.vector;
+
+       return 0;
+}
+
+/*
+ * svm_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+                             uint32_t guest_irq, bool set)
+{
+       struct kvm_kernel_irq_routing_entry *e;
+       struct kvm_irq_routing_table *irq_rt;
+       int idx, ret = -EINVAL;
+
+       if (!kvm_arch_has_assigned_device(kvm) ||
+           !irq_remapping_cap(IRQ_POSTING_CAP))
+               return 0;
+
+       pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
+                __func__, host_irq, guest_irq, set);
+
+       idx = srcu_read_lock(&kvm->irq_srcu);
+       irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+       WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+       hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+               struct vcpu_data vcpu_info;
+               struct vcpu_svm *svm = NULL;
+
+               if (e->type != KVM_IRQ_ROUTING_MSI)
+                       continue;
+
+               /**
+                * Here, we setup with legacy mode in the following cases:
+                * 1. When cannot target interrupt to a specific vcpu.
+                * 2. Unsetting posted interrupt.
+                * 3. APIC virtialization is disabled for the vcpu.
+                */
+               if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
+                   kvm_vcpu_apicv_active(&svm->vcpu)) {
+                       struct amd_iommu_pi_data pi;
+
+                       /* Try to enable guest_mode in IRTE */
+                       pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
+                       pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
+                                                    svm->vcpu.vcpu_id);
+                       pi.is_guest_mode = true;
+                       pi.vcpu_data = &vcpu_info;
+                       ret = irq_set_vcpu_affinity(host_irq, &pi);
+
+                       /**
+                        * Here, we successfully setting up vcpu affinity in
+                        * IOMMU guest mode. Now, we need to store the posted
+                        * interrupt information in a per-vcpu ir_list so that
+                        * we can reference to them directly when we update vcpu
+                        * scheduling information in IOMMU irte.
+                        */
+                       if (!ret && pi.is_guest_mode)
+                               svm_ir_list_add(svm, &pi);
+               } else {
+                       /* Use legacy mode in IRTE */
+                       struct amd_iommu_pi_data pi;
+
+                       /**
+                        * Here, pi is used to:
+                        * - Tell IOMMU to use legacy mode for this interrupt.
+                        * - Retrieve ga_tag of prior interrupt remapping data.
+                        */
+                       pi.is_guest_mode = false;
+                       ret = irq_set_vcpu_affinity(host_irq, &pi);
+
+                       /**
+                        * Check if the posted interrupt was previously
+                        * setup with the guest_mode by checking if the ga_tag
+                        * was cached. If so, we need to clean up the per-vcpu
+                        * ir_list.
+                        */
+                       if (!ret && pi.prev_ga_tag) {
+                               int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
+                               struct kvm_vcpu *vcpu;
+
+                               vcpu = kvm_get_vcpu_by_id(kvm, id);
+                               if (vcpu)
+                                       svm_ir_list_del(to_svm(vcpu), &pi);
+                       }
+               }
+
+               if (!ret && svm) {
+                       trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
+                                                host_irq, e->gsi,
+                                                vcpu_info.vector,
+                                                vcpu_info.pi_desc_addr, set);
+               }
+
+               if (ret < 0) {
+                       pr_err("%s: failed to update PI IRTE\n", __func__);
+                       goto out;
+               }
+       }
+
+       ret = 0;
+out:
+       srcu_read_unlock(&kvm->irq_srcu, idx);
+       return ret;
+}
+
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -5078,6 +5441,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 
        .pmu_ops = &amd_pmu_ops,
        .deliver_posted_interrupt = svm_deliver_avic_intr,
+       .update_pi_irte = svm_update_pi_irte,
 };
 
 static int __init svm_init(void)
index 121fdf6e9ed09d15ef3c5abb8d00cf77a2ab5e78..4ee4187fcea3583eb420c3afaa3b77fb61ebfa77 100644 (file)
@@ -939,6 +939,7 @@ static DEFINE_SPINLOCK(vmx_vpid_lock);
 static struct vmcs_config {
        int size;
        int order;
+       u32 basic_cap;
        u32 revision_id;
        u32 pin_based_exec_ctrl;
        u32 cpu_based_exec_ctrl;
@@ -1215,6 +1216,11 @@ static inline bool cpu_has_vmx_ple(void)
                SECONDARY_EXEC_PAUSE_LOOP_EXITING;
 }
 
+static inline bool cpu_has_vmx_basic_inout(void)
+{
+       return  (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
+}
+
 static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
 {
        return flexpriority_enabled && lapic_in_kernel(vcpu);
@@ -2877,6 +2883,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
                           ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
                           (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
+               if (cpu_has_vmx_basic_inout())
+                       *pdata |= VMX_BASIC_INOUT;
                break;
        case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
        case MSR_IA32_VMX_PINBASED_CTLS:
@@ -3457,7 +3465,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                return -EIO;
 
        vmcs_conf->size = vmx_msr_high & 0x1fff;
-       vmcs_conf->order = get_order(vmcs_config.size);
+       vmcs_conf->order = get_order(vmcs_conf->size);
+       vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
        vmcs_conf->revision_id = vmx_msr_low;
 
        vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
@@ -6109,7 +6118,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
        gla_validity = (exit_qualification >> 7) & 0x3;
-       if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
+       if (gla_validity == 0x2) {
                printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
                printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
                        (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
@@ -6726,7 +6735,7 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
 {
        /* TODO: not to reset guest simply here. */
        kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-       pr_warn("kvm: nested vmx abort, indicator %d\n", indicator);
+       pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
 }
 
 static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
@@ -7013,7 +7022,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
        vmx->nested.vmcs02_num = 0;
 
        hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
-                    HRTIMER_MODE_REL);
+                    HRTIMER_MODE_REL_PINNED);
        vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
 
        vmx->nested.vmxon = true;
@@ -9598,7 +9607,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
        maxphyaddr = cpuid_maxphyaddr(vcpu);
        if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
            (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
-               pr_warn_ratelimited(
+               pr_debug_ratelimited(
                        "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
                        addr_field, maxphyaddr, count, addr);
                return -EINVAL;
@@ -9671,13 +9680,13 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
        for (i = 0; i < count; i++) {
                if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
                                        &e, sizeof(e))) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                __func__, i, gpa + i * sizeof(e));
                        goto fail;
                }
                if (nested_vmx_load_msr_check(vcpu, &e)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s check failed (%u, 0x%x, 0x%x)\n",
                                __func__, i, e.index, e.reserved);
                        goto fail;
@@ -9685,7 +9694,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
                msr.index = e.index;
                msr.data = e.value;
                if (kvm_set_msr(vcpu, &msr)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
                                __func__, i, e.index, e.value);
                        goto fail;
@@ -9706,13 +9715,13 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
                if (kvm_vcpu_read_guest(vcpu,
                                        gpa + i * sizeof(e),
                                        &e, 2 * sizeof(u32))) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                __func__, i, gpa + i * sizeof(e));
                        return -EINVAL;
                }
                if (nested_vmx_store_msr_check(vcpu, &e)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s check failed (%u, 0x%x, 0x%x)\n",
                                __func__, i, e.index, e.reserved);
                        return -EINVAL;
@@ -9720,7 +9729,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
                msr_info.host_initiated = false;
                msr_info.index = e.index;
                if (kvm_get_msr(vcpu, &msr_info)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot read MSR (%u, 0x%x)\n",
                                __func__, i, e.index);
                        return -EINVAL;
@@ -9729,7 +9738,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
                                         gpa + i * sizeof(e) +
                                             offsetof(struct vmx_msr_entry, value),
                                         &msr_info.data, sizeof(msr_info.data))) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
                                __func__, i, e.index, msr_info.data);
                        return -EINVAL;
@@ -10500,6 +10509,9 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
        }
 
+       if (nested_cpu_has_ept(vmcs12))
+               vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+
        if (nested_cpu_has_vid(vmcs12))
                vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
 
index 19f9f9e05c2a812fd07d07ce0e69223ec2accc25..57ffe78931042c15f35d2e3a977cacd1f43333c1 100644 (file)
@@ -6700,7 +6700,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        kvm_put_guest_xcr0(vcpu);
 
-       /* Interrupt is enabled by handle_external_intr() */
        kvm_x86_ops->handle_external_intr(vcpu);
 
        ++vcpu->stat.exits;
index 77e6ccf149011b2f3b43ba01aa1454fd08889dc8..4309b60ebf171606467e8b4615c8830bf76d6a52 100644 (file)
@@ -31,7 +31,6 @@
 #include "trace.h"
 
 static struct timecounter *timecounter;
-static struct workqueue_struct *wqueue;
 static unsigned int host_vtimer_irq;
 static u32 host_vtimer_irq_flags;
 
@@ -141,7 +140,7 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
                return HRTIMER_RESTART;
        }
 
-       queue_work(wqueue, &timer->expired);
+       schedule_work(&timer->expired);
        return HRTIMER_NORESTART;
 }
 
@@ -449,12 +448,6 @@ int kvm_timer_hyp_init(void)
                goto out;
        }
 
-       wqueue = create_singlethread_workqueue("kvm_arch_timer");
-       if (!wqueue) {
-               err = -ENOMEM;
-               goto out_free;
-       }
-
        kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
 
        cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
@@ -518,7 +511,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
         * VCPUs have the enabled variable set, before entering the guest, if
         * the arch timers are enabled.
         */
-       if (timecounter && wqueue)
+       if (timecounter)
                timer->enabled = 1;
 
        return 0;
index e469b60124718e60740e24857d15ae7b8a503552..f397e9b20370a2fb547b04fe555802846b2e9aef 100644 (file)
@@ -42,7 +42,6 @@
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
 
-static struct workqueue_struct *irqfd_cleanup_wq;
 
 static void
 irqfd_inject(struct work_struct *work)
@@ -168,7 +167,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
 
        list_del_init(&irqfd->list);
 
-       queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
+       schedule_work(&irqfd->shutdown);
 }
 
 int __attribute__((weak)) kvm_arch_set_irq_inatomic(
@@ -555,7 +554,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
         * so that we guarantee there will not be any more interrupts on this
         * gsi once this deassign function returns.
         */
-       flush_workqueue(irqfd_cleanup_wq);
+       flush_work(&irqfd->shutdown);
 
        return 0;
 }
@@ -592,7 +591,7 @@ kvm_irqfd_release(struct kvm *kvm)
         * Block until we know all outstanding shutdown jobs have completed
         * since we do not take a kvm* reference.
         */
-       flush_workqueue(irqfd_cleanup_wq);
+       flush_work(&irqfd->shutdown);
 
 }
 
@@ -622,23 +621,8 @@ void kvm_irq_routing_update(struct kvm *kvm)
        spin_unlock_irq(&kvm->irqfds.lock);
 }
 
-/*
- * create a host-wide workqueue for issuing deferred shutdown requests
- * aggregated from all vm* instances. We need our own isolated single-thread
- * queue to prevent deadlock against flushing the normal work-queue.
- */
-int kvm_irqfd_init(void)
-{
-       irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
-       if (!irqfd_cleanup_wq)
-               return -ENOMEM;
-
-       return 0;
-}
-
 void kvm_irqfd_exit(void)
 {
-       destroy_workqueue(irqfd_cleanup_wq);
 }
 #endif
 
index 195078225aa5d0c3b3214fd40e0b5f441c5d7518..b3fa12ce116687b1b7ae8e468d5c71637e26ca3e 100644 (file)
@@ -3807,12 +3807,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
         * kvm_arch_init makes sure there's at most one caller
         * for architectures that support multiple implementations,
         * like intel and amd on x86.
-        * kvm_arch_init must be called before kvm_irqfd_init to avoid creating
-        * conflicts in case kvm is already setup for another implementation.
         */
-       r = kvm_irqfd_init();
-       if (r)
-               goto out_irqfd;
 
        if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
                r = -ENOMEM;
@@ -3894,7 +3889,6 @@ out_free_0a:
        free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
        kvm_irqfd_exit();
-out_irqfd:
        kvm_arch_exit();
 out_fail:
        return r;
This page took 0.049176 seconds and 5 git commands to generate.