KVM: nVMX: Refactor handle_vmwrite
[deliverable/linux.git] / arch / x86 / kvm / vmx.c
index 6667042714cc65e78208c517ccc0802a40dd4ccf..1ee63cae82cdcccf11ec4f2b3488b30c907fa424 100644 (file)
@@ -84,8 +84,11 @@ module_param(vmm_exclusive, bool, S_IRUGO);
 static bool __read_mostly fasteoi = 1;
 module_param(fasteoi, bool, S_IRUGO);
 
-static bool __read_mostly enable_apicv_reg_vid;
+static bool __read_mostly enable_apicv = 1;
+module_param(enable_apicv, bool, S_IRUGO);
 
+static bool __read_mostly enable_shadow_vmcs = 1;
+module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
 /*
  * If nested=1, nested virtualization is supported, i.e., guests may use
  * VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -298,7 +301,8 @@ struct __packed vmcs12 {
        u32 guest_activity_state;
        u32 guest_sysenter_cs;
        u32 host_ia32_sysenter_cs;
-       u32 padding32[8]; /* room for future expansion */
+       u32 vmx_preemption_timer_value;
+       u32 padding32[7]; /* room for future expansion */
        u16 virtual_processor_id;
        u16 guest_es_selector;
        u16 guest_cs_selector;
@@ -365,6 +369,31 @@ struct nested_vmx {
        struct page *apic_access_page;
 };
 
+#define POSTED_INTR_ON  0
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+       u32 pir[8];     /* Posted interrupt requested */
+       u32 control;    /* bit 0 of control is outstanding notification bit */
+       u32 rsvd[7];
+} __aligned(64);
+
+static bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+       return test_and_set_bit(POSTED_INTR_ON,
+                       (unsigned long *)&pi_desc->control);
+}
+
+static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+       return test_and_clear_bit(POSTED_INTR_ON,
+                       (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+       return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
 struct vcpu_vmx {
        struct kvm_vcpu       vcpu;
        unsigned long         host_rsp;
@@ -377,6 +406,7 @@ struct vcpu_vmx {
        struct shared_msr_entry *guest_msrs;
        int                   nmsrs;
        int                   save_nmsrs;
+       unsigned long         host_idt_base;
 #ifdef CONFIG_X86_64
        u64                   msr_host_kernel_gs_base;
        u64                   msr_guest_kernel_gs_base;
@@ -428,6 +458,9 @@ struct vcpu_vmx {
 
        bool rdtscp_enabled;
 
+       /* Posted interrupt descriptor */
+       struct pi_desc pi_desc;
+
        /* Support for a guest hypervisor (nested VMX) */
        struct nested_vmx nested;
 };
@@ -451,6 +484,64 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
                                [number##_HIGH] = VMCS12_OFFSET(name)+4
 
+
+static const unsigned long shadow_read_only_fields[] = {
+       /*
+        * We do NOT shadow fields that are modified when L0
+        * traps and emulates any vmx instruction (e.g. VMPTRLD,
+        * VMXON...) executed by L1.
+        * For example, VM_INSTRUCTION_ERROR is read
+        * by L1 if a vmx instruction fails (part of the error path).
+        * Note the code assumes this logic. If for some reason
+        * we start shadowing these fields then we need to
+        * force a shadow sync when L0 emulates vmx instructions
+        * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
+        * by nested_vmx_failValid)
+        */
+       VM_EXIT_REASON,
+       VM_EXIT_INTR_INFO,
+       VM_EXIT_INSTRUCTION_LEN,
+       IDT_VECTORING_INFO_FIELD,
+       IDT_VECTORING_ERROR_CODE,
+       VM_EXIT_INTR_ERROR_CODE,
+       EXIT_QUALIFICATION,
+       GUEST_LINEAR_ADDRESS,
+       GUEST_PHYSICAL_ADDRESS
+};
+static const int max_shadow_read_only_fields =
+       ARRAY_SIZE(shadow_read_only_fields);
+
+static const unsigned long shadow_read_write_fields[] = {
+       GUEST_RIP,
+       GUEST_RSP,
+       GUEST_CR0,
+       GUEST_CR3,
+       GUEST_CR4,
+       GUEST_INTERRUPTIBILITY_INFO,
+       GUEST_RFLAGS,
+       GUEST_CS_SELECTOR,
+       GUEST_CS_AR_BYTES,
+       GUEST_CS_LIMIT,
+       GUEST_CS_BASE,
+       GUEST_ES_BASE,
+       CR0_GUEST_HOST_MASK,
+       CR0_READ_SHADOW,
+       CR4_READ_SHADOW,
+       TSC_OFFSET,
+       EXCEPTION_BITMAP,
+       CPU_BASED_VM_EXEC_CONTROL,
+       VM_ENTRY_EXCEPTION_ERROR_CODE,
+       VM_ENTRY_INTR_INFO_FIELD,
+       VM_ENTRY_INSTRUCTION_LEN,
+       VM_ENTRY_EXCEPTION_ERROR_CODE,
+       HOST_FS_BASE,
+       HOST_GS_BASE,
+       HOST_FS_SELECTOR,
+       HOST_GS_SELECTOR
+};
+static const int max_shadow_read_write_fields =
+       ARRAY_SIZE(shadow_read_write_fields);
+
 static const unsigned short vmcs_field_to_offset_table[] = {
        FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
        FIELD(GUEST_ES_SELECTOR, guest_es_selector),
@@ -537,6 +628,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
        FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
        FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
        FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
+       FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
        FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
        FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
        FIELD(CR0_READ_SHADOW, cr0_read_shadow),
@@ -624,6 +716,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
                            struct kvm_segment *var, int seg);
 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -640,6 +733,8 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
+static unsigned long *vmx_vmread_bitmap;
+static unsigned long *vmx_vmwrite_bitmap;
 
 static bool cpu_has_load_ia32_efer;
 static bool cpu_has_load_perf_global_ctrl;
@@ -782,6 +877,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
                SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 }
 
+static inline bool cpu_has_vmx_posted_intr(void)
+{
+       return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+}
+
+static inline bool cpu_has_vmx_apicv(void)
+{
+       return cpu_has_vmx_apic_register_virt() &&
+               cpu_has_vmx_virtual_intr_delivery() &&
+               cpu_has_vmx_posted_intr();
+}
+
 static inline bool cpu_has_vmx_flexpriority(void)
 {
        return cpu_has_vmx_tpr_shadow() &&
@@ -895,6 +1002,18 @@ static inline bool cpu_has_vmx_wbinvd_exit(void)
                SECONDARY_EXEC_WBINVD_EXITING;
 }
 
+static inline bool cpu_has_vmx_shadow_vmcs(void)
+{
+       u64 vmx_msr;
+       rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+       /* check if the cpu supports writing r/o exit information fields */
+       if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
+               return false;
+
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_SHADOW_VMCS;
+}
+
 static inline bool report_flexpriority(void)
 {
        return flexpriority_enabled;
@@ -2022,6 +2141,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
 static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
 static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
 static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
+static u32 nested_vmx_misc_low, nested_vmx_misc_high;
 static __init void nested_vmx_setup_ctls_msrs(void)
 {
        /*
@@ -2040,30 +2160,40 @@ static __init void nested_vmx_setup_ctls_msrs(void)
         */
 
        /* pin-based controls */
+       rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
+             nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high);
        /*
         * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is
         * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR.
         */
-       nested_vmx_pinbased_ctls_low = 0x16 ;
-       nested_vmx_pinbased_ctls_high = 0x16 |
-               PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
-               PIN_BASED_VIRTUAL_NMIS;
+       nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
+       nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
+               PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
+               PIN_BASED_VMX_PREEMPTION_TIMER;
+       nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
 
-       /* exit controls */
-       nested_vmx_exit_ctls_low = 0;
+       /*
+        * Exit controls
+        * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
+        * 17 must be 1.
+        */
+       nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
        /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
 #ifdef CONFIG_X86_64
        nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #else
        nested_vmx_exit_ctls_high = 0;
 #endif
+       nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
 
        /* entry controls */
        rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
                nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
-       nested_vmx_entry_ctls_low = 0;
+       /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
+       nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
        nested_vmx_entry_ctls_high &=
                VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
+       nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
 
        /* cpu-based controls */
        rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2080,6 +2210,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
                CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
                CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
                CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
+               CPU_BASED_PAUSE_EXITING |
                CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
        /*
         * We can allow some features even when not supported by the
@@ -2094,7 +2225,14 @@ static __init void nested_vmx_setup_ctls_msrs(void)
                nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high);
        nested_vmx_secondary_ctls_low = 0;
        nested_vmx_secondary_ctls_high &=
-               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+               SECONDARY_EXEC_WBINVD_EXITING;
+
+       /* miscellaneous data */
+       rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
+       nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
+               VMX_MISC_SAVE_EFER_LMA;
+       nested_vmx_misc_high = 0;
 }
 
 static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
@@ -2165,7 +2303,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                                        nested_vmx_entry_ctls_high);
                break;
        case MSR_IA32_VMX_MISC:
-               *pdata = 0;
+               *pdata = vmx_control_msr(nested_vmx_misc_low,
+                                        nested_vmx_misc_high);
                break;
        /*
         * These MSRs specify bits which the guest must keep fixed (on or off)
@@ -2529,12 +2668,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
        u32 _vmexit_control = 0;
        u32 _vmentry_control = 0;
 
-       min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-       opt = PIN_BASED_VIRTUAL_NMIS;
-       if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
-                               &_pin_based_exec_control) < 0)
-               return -EIO;
-
        min = CPU_BASED_HLT_EXITING |
 #ifdef CONFIG_X86_64
              CPU_BASED_CR8_LOAD_EXITING |
@@ -2573,7 +2706,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                        SECONDARY_EXEC_RDTSCP |
                        SECONDARY_EXEC_ENABLE_INVPCID |
                        SECONDARY_EXEC_APIC_REGISTER_VIRT |
-                       SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+                       SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+                       SECONDARY_EXEC_SHADOW_VMCS;
                if (adjust_vmx_controls(min2, opt2,
                                        MSR_IA32_VMX_PROCBASED_CTLS2,
                                        &_cpu_based_2nd_exec_control) < 0)
@@ -2605,11 +2739,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 #ifdef CONFIG_X86_64
        min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
-       opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
+       opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
+               VM_EXIT_ACK_INTR_ON_EXIT;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
                                &_vmexit_control) < 0)
                return -EIO;
 
+       min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+       opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+       if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
+                               &_pin_based_exec_control) < 0)
+               return -EIO;
+
+       if (!(_cpu_based_2nd_exec_control &
+               SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
+               !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
+               _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
+
        min = 0;
        opt = VM_ENTRY_LOAD_IA32_PAT;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
@@ -2762,6 +2908,8 @@ static __init int hardware_setup(void)
 
        if (!cpu_has_vmx_vpid())
                enable_vpid = 0;
+       if (!cpu_has_vmx_shadow_vmcs())
+               enable_shadow_vmcs = 0;
 
        if (!cpu_has_vmx_ept() ||
            !cpu_has_vmx_ept_4levels()) {
@@ -2788,14 +2936,16 @@ static __init int hardware_setup(void)
        if (!cpu_has_vmx_ple())
                ple_gap = 0;
 
-       if (!cpu_has_vmx_apic_register_virt() ||
-                               !cpu_has_vmx_virtual_intr_delivery())
-               enable_apicv_reg_vid = 0;
+       if (!cpu_has_vmx_apicv())
+               enable_apicv = 0;
 
-       if (enable_apicv_reg_vid)
+       if (enable_apicv)
                kvm_x86_ops->update_cr8_intercept = NULL;
-       else
+       else {
                kvm_x86_ops->hwapic_irr_update = NULL;
+               kvm_x86_ops->deliver_posted_interrupt = NULL;
+               kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+       }
 
        if (nested)
                nested_vmx_setup_ctls_msrs();
@@ -2876,22 +3026,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
        vmx->cpl = 0;
 }
 
-static gva_t rmode_tss_base(struct kvm *kvm)
-{
-       if (!kvm->arch.tss_addr) {
-               struct kvm_memslots *slots;
-               struct kvm_memory_slot *slot;
-               gfn_t base_gfn;
-
-               slots = kvm_memslots(kvm);
-               slot = id_to_memslot(slots, 0);
-               base_gfn = slot->base_gfn + slot->npages - 3;
-
-               return base_gfn << PAGE_SHIFT;
-       }
-       return kvm->arch.tss_addr;
-}
-
 static void fix_rmode_seg(int seg, struct kvm_segment *save)
 {
        const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -2942,19 +3076,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
        /*
         * Very old userspace does not call KVM_SET_TSS_ADDR before entering
-        * vcpu. Call it here with phys address pointing 16M below 4G.
+        * vcpu. Warn the user that an update is overdue.
         */
-       if (!vcpu->kvm->arch.tss_addr) {
+       if (!vcpu->kvm->arch.tss_addr)
                printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
                             "called before entering vcpu\n");
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-               vmx_set_tss_addr(vcpu->kvm, 0xfeffd000);
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-       }
 
        vmx_segment_cache_clear(vmx);
 
-       vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
+       vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
        vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
        vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
@@ -3214,7 +3344,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                 */
                if (!nested_vmx_allowed(vcpu))
                        return 1;
-       } else if (to_vmx(vcpu)->nested.vmxon)
+       }
+       if (to_vmx(vcpu)->nested.vmxon &&
+           ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON))
                return 1;
 
        vcpu->arch.cr4 = cr4;
@@ -3550,7 +3682,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
                return true;
 
        /* real mode guest state checks */
-       if (!is_protmode(vcpu)) {
+       if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
                if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
                        return false;
                if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@@ -3599,7 +3731,7 @@ static int init_rmode_tss(struct kvm *kvm)
        int r, idx, ret = 0;
 
        idx = srcu_read_lock(&kvm->srcu);
-       fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
+       fn = kvm->arch.tss_addr >> PAGE_SHIFT;
        r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
        if (r < 0)
                goto out;
@@ -3692,7 +3824,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
        kvm_userspace_mem.flags = 0;
        kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
        kvm_userspace_mem.memory_size = PAGE_SIZE;
-       r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
+       r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
        if (r)
                goto out;
 
@@ -3722,7 +3854,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
        kvm_userspace_mem.guest_phys_addr =
                kvm->arch.ept_identity_map_addr;
        kvm_userspace_mem.memory_size = PAGE_SIZE;
-       r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
+       r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
        if (r)
                goto out;
 
@@ -3869,13 +4001,59 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
                        msr, MSR_TYPE_W);
 }
 
+static int vmx_vm_has_apicv(struct kvm *kvm)
+{
+       return enable_apicv && irqchip_in_kernel(kvm);
+}
+
+/*
+ * Send interrupt to vcpu via posted interrupt way.
+ * 1. If target vcpu is running(non-root mode), send posted interrupt
+ * notification to vcpu and hardware will sync PIR to vIRR atomically.
+ * 2. If target vcpu isn't running(root mode), kick it to pick up the
+ * interrupt from PIR in next vmentry.
+ */
+static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int r;
+
+       if (pi_test_and_set_pir(vector, &vmx->pi_desc))
+               return;
+
+       r = pi_test_and_set_on(&vmx->pi_desc);
+       kvm_make_request(KVM_REQ_EVENT, vcpu);
+#ifdef CONFIG_SMP
+       if (!r && (vcpu->mode == IN_GUEST_MODE))
+               apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
+                               POSTED_INTR_VECTOR);
+       else
+#endif
+               kvm_vcpu_kick(vcpu);
+}
+
+static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       if (!pi_test_and_clear_on(&vmx->pi_desc))
+               return;
+
+       kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
+}
+
+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
+{
+       return;
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
  * Note that host-state that does change is set elsewhere. E.g., host-state
  * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
  */
-static void vmx_set_constant_host_state(void)
+static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 {
        u32 low32, high32;
        unsigned long tmpl;
@@ -3903,6 +4081,7 @@ static void vmx_set_constant_host_state(void)
 
        native_store_idt(&dt);
        vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
+       vmx->host_idt_base = dt.address;
 
        vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
 
@@ -3928,6 +4107,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
        vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
 }
 
+static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
+{
+       u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
+
+       if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
+               pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+       return pin_based_exec_ctrl;
+}
+
 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 {
        u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@@ -3945,11 +4133,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
        return exec_control;
 }
 
-static int vmx_vm_has_apicv(struct kvm *kvm)
-{
-       return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
-}
-
 static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 {
        u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -3971,6 +4154,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
                exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
        exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+       /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
+          (handle_vmptrld).
+          We can NOT enable shadow_vmcs here because we don't have yet
+          a current VMCS12
+       */
+       exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
        return exec_control;
 }
 
@@ -3999,14 +4188,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
        vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
 
+       if (enable_shadow_vmcs) {
+               vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
+               vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+       }
        if (cpu_has_vmx_msr_bitmap())
                vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
 
        vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
        /* Control */
-       vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
-               vmcs_config.pin_based_exec_ctrl);
+       vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
 
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
 
@@ -4015,13 +4207,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
                                vmx_secondary_exec_control(vmx));
        }
 
-       if (enable_apicv_reg_vid) {
+       if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
                vmcs_write64(EOI_EXIT_BITMAP0, 0);
                vmcs_write64(EOI_EXIT_BITMAP1, 0);
                vmcs_write64(EOI_EXIT_BITMAP2, 0);
                vmcs_write64(EOI_EXIT_BITMAP3, 0);
 
                vmcs_write16(GUEST_INTR_STATUS, 0);
+
+               vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+               vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
        }
 
        if (ple_gap) {
@@ -4035,7 +4230,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
        vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
        vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
-       vmx_set_constant_host_state();
+       vmx_set_constant_host_state(vmx);
 #ifdef CONFIG_X86_64
        rdmsrl(MSR_FS_BASE, a);
        vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
@@ -4089,11 +4284,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        return 0;
 }
 
-static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u64 msr;
-       int ret;
 
        vmx->rmode.vm86_active = 0;
 
@@ -4109,12 +4303,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        vmx_segment_cache_clear(vmx);
 
        seg_setup(VCPU_SREG_CS);
-       if (kvm_vcpu_is_bsp(&vmx->vcpu))
-               vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
-       else {
-               vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
-               vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
-       }
+       vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
+       vmcs_write32(GUEST_CS_BASE, 0xffff0000);
 
        seg_setup(VCPU_SREG_DS);
        seg_setup(VCPU_SREG_ES);
@@ -4137,10 +4327,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        vmcs_writel(GUEST_SYSENTER_EIP, 0);
 
        vmcs_writel(GUEST_RFLAGS, 0x02);
-       if (kvm_vcpu_is_bsp(&vmx->vcpu))
-               kvm_rip_write(vcpu, 0xfff0);
-       else
-               kvm_rip_write(vcpu, 0);
+       kvm_rip_write(vcpu, 0xfff0);
 
        vmcs_writel(GUEST_GDTR_BASE, 0);
        vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
@@ -4171,23 +4358,20 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                vmcs_write64(APIC_ACCESS_ADDR,
                             page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
 
+       if (vmx_vm_has_apicv(vcpu->kvm))
+               memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
+
        if (vmx->vpid != 0)
                vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
        vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        vmx_set_cr4(&vmx->vcpu, 0);
        vmx_set_efer(&vmx->vcpu, 0);
        vmx_fpu_activate(&vmx->vcpu);
        update_exception_bitmap(&vmx->vcpu);
 
        vpid_sync_context(vmx);
-
-       ret = 0;
-
-       return ret;
 }
 
 /*
@@ -4335,16 +4519,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-       if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
+       if (is_guest_mode(vcpu)) {
                struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-               if (to_vmx(vcpu)->nested.nested_run_pending ||
-                   (vmcs12->idt_vectoring_info_field &
-                    VECTORING_INFO_VALID_MASK))
+
+               if (to_vmx(vcpu)->nested.nested_run_pending)
                        return 0;
-               nested_vmx_vmexit(vcpu);
-               vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
-               vmcs12->vm_exit_intr_info = 0;
-               /* fall through to normal code, but now in L1, not L2 */
+               if (nested_exit_on_intr(vcpu)) {
+                       nested_vmx_vmexit(vcpu);
+                       vmcs12->vm_exit_reason =
+                               EXIT_REASON_EXTERNAL_INTERRUPT;
+                       vmcs12->vm_exit_intr_info = 0;
+                       /*
+                        * fall through to normal code, but now in L1, not L2
+                        */
+               }
        }
 
        return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -4362,7 +4550,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
                .flags = 0,
        };
 
-       ret = kvm_set_memory_region(kvm, &tss_mem, false);
+       ret = kvm_set_memory_region(kvm, &tss_mem);
        if (ret)
                return ret;
        kvm->arch.tss_addr = addr;
@@ -4603,34 +4791,50 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
 {
-       if (to_vmx(vcpu)->nested.vmxon &&
-           ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
-               return 1;
-
        if (is_guest_mode(vcpu)) {
+               struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+               unsigned long orig_val = val;
+
                /*
                 * We get here when L2 changed cr0 in a way that did not change
                 * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
-                * but did change L0 shadowed bits. This can currently happen
-                * with the TS bit: L0 may want to leave TS on (for lazy fpu
-                * loading) while pretending to allow the guest to change it.
+                * but did change L0 shadowed bits. So we first calculate the
+                * effective cr0 value that L1 would like to write into the
+                * hardware. It consists of the L2-owned bits from the new
+                * value combined with the L1-owned bits from L1's guest_cr0.
                 */
-               if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) |
-                        (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits)))
+               val = (val & ~vmcs12->cr0_guest_host_mask) |
+                       (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
+
+               /* TODO: will have to take unrestricted guest mode into
+                * account */
+               if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
+                       return 1;
+
+               if (kvm_set_cr0(vcpu, val))
                        return 1;
-               vmcs_writel(CR0_READ_SHADOW, val);
+               vmcs_writel(CR0_READ_SHADOW, orig_val);
                return 0;
-       } else
+       } else {
+               if (to_vmx(vcpu)->nested.vmxon &&
+                   ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
+                       return 1;
                return kvm_set_cr0(vcpu, val);
+       }
 }
 
 static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 {
        if (is_guest_mode(vcpu)) {
-               if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) |
-                        (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits)))
+               struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+               unsigned long orig_val = val;
+
+               /* analogously to handle_set_cr0 */
+               val = (val & ~vmcs12->cr4_guest_host_mask) |
+                       (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
+               if (kvm_set_cr4(vcpu, val))
                        return 1;
-               vmcs_writel(CR4_READ_SHADOW, val);
+               vmcs_writel(CR4_READ_SHADOW, orig_val);
                return 0;
        } else
                return kvm_set_cr4(vcpu, val);
@@ -5183,7 +5387,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
                        return 1;
 
-               err = emulate_instruction(vcpu, 0);
+               err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
 
                if (err == EMULATE_DO_MMIO) {
                        ret = 0;
@@ -5259,8 +5463,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
        }
 
        /* Create a new VMCS */
-       item = (struct vmcs02_list *)
-               kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+       item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
        if (!item)
                return NULL;
        item->vmcs02.vmcs = alloc_vmcs();
@@ -5639,6 +5842,33 @@ static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
        }
 }
 
+
+static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
+                                   unsigned long field, u64 field_value){
+       short offset = vmcs_field_to_offset(field);
+       char *p = ((char *) get_vmcs12(vcpu)) + offset;
+       if (offset < 0)
+               return false;
+
+       switch (vmcs_field_type(field)) {
+       case VMCS_FIELD_TYPE_U16:
+               *(u16 *)p = field_value;
+               return true;
+       case VMCS_FIELD_TYPE_U32:
+               *(u32 *)p = field_value;
+               return true;
+       case VMCS_FIELD_TYPE_U64:
+               *(u64 *)p = field_value;
+               return true;
+       case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+               *(natural_width *)p = field_value;
+               return true;
+       default:
+               return false; /* can never happen. */
+       }
+
+}
+
 /*
  * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
  * used before) all generate the same failure when it is missing.
@@ -5703,8 +5933,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
        gva_t gva;
        unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-       char *p;
-       short offset;
        /* The value to write might be 32 or 64 bits, depending on L1's long
         * mode, and eventually we need to write that into a field of several
         * possible lengths. The code below first zero-extends the value to 64
@@ -5741,28 +5969,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
                return 1;
        }
 
-       offset = vmcs_field_to_offset(field);
-       if (offset < 0) {
-               nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-               skip_emulated_instruction(vcpu);
-               return 1;
-       }
-       p = ((char *) get_vmcs12(vcpu)) + offset;
-
-       switch (vmcs_field_type(field)) {
-       case VMCS_FIELD_TYPE_U16:
-               *(u16 *)p = field_value;
-               break;
-       case VMCS_FIELD_TYPE_U32:
-               *(u32 *)p = field_value;
-               break;
-       case VMCS_FIELD_TYPE_U64:
-               *(u64 *)p = field_value;
-               break;
-       case VMCS_FIELD_TYPE_NATURAL_WIDTH:
-               *(natural_width *)p = field_value;
-               break;
-       default:
+       if (!vmcs12_write_any(vcpu, field, field_value)) {
                nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
                skip_emulated_instruction(vcpu);
                return 1;
@@ -5908,6 +6115,52 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 static const int kvm_vmx_max_exit_handlers =
        ARRAY_SIZE(kvm_vmx_exit_handlers);
 
+static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
+                                      struct vmcs12 *vmcs12)
+{
+       unsigned long exit_qualification;
+       gpa_t bitmap, last_bitmap;
+       unsigned int port;
+       int size;
+       u8 b;
+
+       if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
+               return 1;
+
+       if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+               return 0;
+
+       exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+       port = exit_qualification >> 16;
+       size = (exit_qualification & 7) + 1;
+
+       last_bitmap = (gpa_t)-1;
+       b = -1;
+
+       while (size > 0) {
+               if (port < 0x8000)
+                       bitmap = vmcs12->io_bitmap_a;
+               else if (port < 0x10000)
+                       bitmap = vmcs12->io_bitmap_b;
+               else
+                       return 1;
+               bitmap += (port & 0x7fff) / 8;
+
+               if (last_bitmap != bitmap)
+                       if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
+                               return 1;
+               if (b & (1 << (port & 7)))
+                       return 1;
+
+               port++;
+               size--;
+               last_bitmap = bitmap;
+       }
+
+       return 0;
+}
+
 /*
  * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
  * rather than handle it ourselves in L0. I.e., check whether L1 expressed
@@ -5939,7 +6192,8 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
        /* Then read the msr_index'th bit from this bitmap: */
        if (msr_index < 1024*8) {
                unsigned char b;
-               kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1);
+               if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
+                       return 1;
                return 1 & (b >> (msr_index & 7));
        } else
                return 1; /* let L1 handle the wrong parameter */
@@ -6033,10 +6287,10 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
  */
 static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 {
-       u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
        u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+       u32 exit_reason = vmx->exit_reason;
 
        if (vmx->nested.nested_run_pending)
                return 0;
@@ -6060,14 +6314,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_TRIPLE_FAULT:
                return 1;
        case EXIT_REASON_PENDING_INTERRUPT:
+               return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
        case EXIT_REASON_NMI_WINDOW:
-               /*
-                * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit
-                * (aka Interrupt Window Exiting) only when L1 turned it on,
-                * so if we got a PENDING_INTERRUPT exit, this must be for L1.
-                * Same for NMI Window Exiting.
-                */
-               return 1;
+               return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
        case EXIT_REASON_TASK_SWITCH:
                return 1;
        case EXIT_REASON_CPUID:
@@ -6097,8 +6346,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_DR_ACCESS:
                return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
        case EXIT_REASON_IO_INSTRUCTION:
-               /* TODO: support IO bitmaps */
-               return 1;
+               return nested_vmx_exit_handled_io(vcpu, vmcs12);
        case EXIT_REASON_MSR_READ:
        case EXIT_REASON_MSR_WRITE:
                return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
@@ -6122,6 +6370,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_EPT_VIOLATION:
        case EXIT_REASON_EPT_MISCONFIG:
                return 0;
+       case EXIT_REASON_PREEMPTION_TIMER:
+               return vmcs12->pin_based_vm_exec_control &
+                       PIN_BASED_VMX_PREEMPTION_TIMER;
        case EXIT_REASON_WBINVD:
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
        case EXIT_REASON_XSETBV:
@@ -6316,6 +6567,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
+       if (!vmx_vm_has_apicv(vcpu->kvm))
+               return;
+
        vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
        vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
        vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
@@ -6346,6 +6600,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
        }
 }
 
+static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+{
+       u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+       /*
+        * If external interrupt exists, IF bit is set in rflags/eflags on the
+        * interrupt stack frame, and interrupt will be enabled on a return
+        * from interrupt handler.
+        */
+       if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
+                       == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
+               unsigned int vector;
+               unsigned long entry;
+               gate_desc *desc;
+               struct vcpu_vmx *vmx = to_vmx(vcpu);
+#ifdef CONFIG_X86_64
+               unsigned long tmp;
+#endif
+
+               vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
+               desc = (gate_desc *)vmx->host_idt_base + vector;
+               entry = gate_offset(*desc);
+               asm volatile(
+#ifdef CONFIG_X86_64
+                       "mov %%" _ASM_SP ", %[sp]\n\t"
+                       "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
+                       "push $%c[ss]\n\t"
+                       "push %[sp]\n\t"
+#endif
+                       "pushf\n\t"
+                       "orl $0x200, (%%" _ASM_SP ")\n\t"
+                       __ASM_SIZE(push) " $%c[cs]\n\t"
+                       "call *%[entry]\n\t"
+                       :
+#ifdef CONFIG_X86_64
+                       [sp]"=&r"(tmp)
+#endif
+                       :
+                       [entry]"r"(entry),
+                       [ss]"i"(__KERNEL_DS),
+                       [cs]"i"(__KERNEL_CS)
+                       );
+       } else
+               local_irq_enable();
+}
+
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
        u32 exit_intr_info;
@@ -6388,7 +6688,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
                        ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
 }
 
-static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
+static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
                                      u32 idt_vectoring_info,
                                      int instr_len_field,
                                      int error_code_field)
@@ -6399,46 +6699,43 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
 
        idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
-       vmx->vcpu.arch.nmi_injected = false;
-       kvm_clear_exception_queue(&vmx->vcpu);
-       kvm_clear_interrupt_queue(&vmx->vcpu);
+       vcpu->arch.nmi_injected = false;
+       kvm_clear_exception_queue(vcpu);
+       kvm_clear_interrupt_queue(vcpu);
 
        if (!idtv_info_valid)
                return;
 
-       kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+       kvm_make_request(KVM_REQ_EVENT, vcpu);
 
        vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
        type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
 
        switch (type) {
        case INTR_TYPE_NMI_INTR:
-               vmx->vcpu.arch.nmi_injected = true;
+               vcpu->arch.nmi_injected = true;
                /*
                 * SDM 3: 27.7.1.2 (September 2008)
                 * Clear bit "block by NMI" before VM entry if a NMI
                 * delivery faulted.
                 */
-               vmx_set_nmi_mask(&vmx->vcpu, false);
+               vmx_set_nmi_mask(vcpu, false);
                break;
        case INTR_TYPE_SOFT_EXCEPTION:
-               vmx->vcpu.arch.event_exit_inst_len =
-                       vmcs_read32(instr_len_field);
+               vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
                /* fall through */
        case INTR_TYPE_HARD_EXCEPTION:
                if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
                        u32 err = vmcs_read32(error_code_field);
-                       kvm_queue_exception_e(&vmx->vcpu, vector, err);
+                       kvm_queue_exception_e(vcpu, vector, err);
                } else
-                       kvm_queue_exception(&vmx->vcpu, vector);
+                       kvm_queue_exception(vcpu, vector);
                break;
        case INTR_TYPE_SOFT_INTR:
-               vmx->vcpu.arch.event_exit_inst_len =
-                       vmcs_read32(instr_len_field);
+               vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
                /* fall through */
        case INTR_TYPE_EXT_INTR:
-               kvm_queue_interrupt(&vmx->vcpu, vector,
-                       type == INTR_TYPE_SOFT_INTR);
+               kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
                break;
        default:
                break;
@@ -6447,18 +6744,14 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
 
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
-       if (is_guest_mode(&vmx->vcpu))
-               return;
-       __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info,
+       __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
                                  VM_EXIT_INSTRUCTION_LEN,
                                  IDT_VECTORING_ERROR_CODE);
 }
 
 static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
 {
-       if (is_guest_mode(vcpu))
-               return;
-       __vmx_complete_interrupts(to_vmx(vcpu),
+       __vmx_complete_interrupts(vcpu,
                                  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
                                  VM_ENTRY_INSTRUCTION_LEN,
                                  VM_ENTRY_EXCEPTION_ERROR_CODE);
@@ -6489,21 +6782,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long debugctlmsr;
 
-       if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
-               struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-               if (vmcs12->idt_vectoring_info_field &
-                               VECTORING_INFO_VALID_MASK) {
-                       vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-                               vmcs12->idt_vectoring_info_field);
-                       vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
-                               vmcs12->vm_exit_instruction_len);
-                       if (vmcs12->idt_vectoring_info_field &
-                                       VECTORING_INFO_DELIVER_CODE_MASK)
-                               vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
-                                       vmcs12->idt_vectoring_error_code);
-               }
-       }
-
        /* Record the guest's net vcpu time for enforced NMI injections. */
        if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
                vmx->entry_time = ktime_get();
@@ -6662,17 +6940,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
        vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
-       if (is_guest_mode(vcpu)) {
-               struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-               vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
-               if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
-                       vmcs12->idt_vectoring_error_code =
-                               vmcs_read32(IDT_VECTORING_ERROR_CODE);
-                       vmcs12->vm_exit_instruction_len =
-                               vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-               }
-       }
-
        vmx->loaded_vmcs->launched = 1;
 
        vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -6734,10 +7001,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
        put_cpu();
        if (err)
                goto free_vmcs;
-       if (vm_need_virtualize_apic_accesses(kvm))
+       if (vm_need_virtualize_apic_accesses(kvm)) {
                err = alloc_apic_access_page(kvm);
                if (err)
                        goto free_vmcs;
+       }
 
        if (enable_ept) {
                if (!kvm->arch.ept_identity_map_addr)
@@ -6931,9 +7199,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                vmcs12->vm_entry_instruction_len);
        vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
                vmcs12->guest_interruptibility_info);
-       vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state);
        vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
-       vmcs_writel(GUEST_DR7, vmcs12->guest_dr7);
+       kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
        vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags);
        vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
                vmcs12->guest_pending_dbg_exceptions);
@@ -6946,6 +7213,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                (vmcs_config.pin_based_exec_ctrl |
                 vmcs12->pin_based_vm_exec_control));
 
+       if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
+               vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
+                            vmcs12->vmx_preemption_timer_value);
+
        /*
         * Whether page-faults are trapped is determined by a combination of
         * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
@@ -7016,7 +7287,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
         * Other fields are different per CPU, and will be set later when
         * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
         */
-       vmx_set_constant_host_state();
+       vmx_set_constant_host_state(vmx);
 
        /*
         * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
@@ -7146,6 +7417,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                return 1;
        }
 
+       if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) {
+               nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+               return 1;
+       }
+
        if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
                        !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) {
                /*TODO: Also verify bits beyond physical address width are 0*/
@@ -7223,6 +7499,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        vcpu->cpu = cpu;
        put_cpu();
 
+       vmx_segment_cache_clear(vmx);
+
        vmcs12->launch_state = 1;
 
        prepare_vmcs02(vcpu, vmcs12);
@@ -7273,6 +7551,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                        vcpu->arch.cr4_guest_owned_bits));
 }
 
+static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
+                                      struct vmcs12 *vmcs12)
+{
+       u32 idt_vectoring;
+       unsigned int nr;
+
+       if (vcpu->arch.exception.pending) {
+               nr = vcpu->arch.exception.nr;
+               idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
+
+               if (kvm_exception_is_soft(nr)) {
+                       vmcs12->vm_exit_instruction_len =
+                               vcpu->arch.event_exit_inst_len;
+                       idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
+               } else
+                       idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
+
+               if (vcpu->arch.exception.has_error_code) {
+                       idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
+                       vmcs12->idt_vectoring_error_code =
+                               vcpu->arch.exception.error_code;
+               }
+
+               vmcs12->idt_vectoring_info_field = idt_vectoring;
+       } else if (vcpu->arch.nmi_pending) {
+               vmcs12->idt_vectoring_info_field =
+                       INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
+       } else if (vcpu->arch.interrupt.pending) {
+               nr = vcpu->arch.interrupt.nr;
+               idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
+
+               if (vcpu->arch.interrupt.soft) {
+                       idt_vectoring |= INTR_TYPE_SOFT_INTR;
+                       vmcs12->vm_entry_instruction_len =
+                               vcpu->arch.event_exit_inst_len;
+               } else
+                       idt_vectoring |= INTR_TYPE_EXT_INTR;
+
+               vmcs12->idt_vectoring_info_field = idt_vectoring;
+       }
+}
+
 /*
  * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
  * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -7284,7 +7604,7 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  * exit-information fields only. Other fields are modified by L1 with VMWRITE,
  * which already writes to vmcs12 directly.
  */
-void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
        /* update guest state fields: */
        vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
@@ -7332,16 +7652,19 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
        vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
 
-       vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE);
        vmcs12->guest_interruptibility_info =
                vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
        vmcs12->guest_pending_dbg_exceptions =
                vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
 
+       vmcs12->vm_entry_controls =
+               (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
+               (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
+
        /* TODO: These cannot have changed unless we have MSR bitmaps and
         * the relevant bit asks not to trap the change */
        vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
-       if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT)
+       if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
                vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
        vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
        vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
@@ -7349,21 +7672,38 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
        /* update exit information fields: */
 
-       vmcs12->vm_exit_reason  = vmcs_read32(VM_EXIT_REASON);
+       vmcs12->vm_exit_reason  = to_vmx(vcpu)->exit_reason;
        vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
        vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-       vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
-       vmcs12->idt_vectoring_info_field =
-               vmcs_read32(IDT_VECTORING_INFO_FIELD);
-       vmcs12->idt_vectoring_error_code =
-               vmcs_read32(IDT_VECTORING_ERROR_CODE);
+       if ((vmcs12->vm_exit_intr_info &
+            (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
+           (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
+               vmcs12->vm_exit_intr_error_code =
+                       vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+       vmcs12->idt_vectoring_info_field = 0;
        vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
        vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 
-       /* clear vm-entry fields which are to be cleared on exit */
-       if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+       if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+               /* vm_entry_intr_info_field is cleared on exit. Emulate this
+                * instead of reading the real value. */
                vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
+
+               /*
+                * Transfer the event that L0 or L1 may wanted to inject into
+                * L2 to IDT_VECTORING_INFO_FIELD.
+                */
+               vmcs12_save_pending_event(vcpu, vmcs12);
+       }
+
+       /*
+        * Drop what we picked up for L2 via vmx_complete_interrupts. It is
+        * preserved above and would only end up incorrectly in L1.
+        */
+       vcpu->arch.nmi_injected = false;
+       kvm_clear_exception_queue(vcpu);
+       kvm_clear_interrupt_queue(vcpu);
 }
 
 /*
@@ -7375,7 +7715,8 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  * Failures During or After Loading Guest State").
  * This function should be called when the active VMCS is L1's (vmcs01).
  */
-void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+                                  struct vmcs12 *vmcs12)
 {
        if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
                vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -7387,6 +7728,7 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
        kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
        kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
+       vmx_set_rflags(vcpu, X86_EFLAGS_BIT1);
        /*
         * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
         * actually changed, because it depends on the current state of
@@ -7445,6 +7787,9 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
                vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
                        vmcs12->host_ia32_perf_global_ctrl);
+
+       kvm_set_dr(vcpu, 7, 0x400);
+       vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 }
 
 /*
@@ -7458,6 +7803,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
        int cpu;
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
+       /* trying to cancel vmlaunch/vmresume is a bug */
+       WARN_ON_ONCE(vmx->nested.nested_run_pending);
+
        leave_guest_mode(vcpu);
        prepare_vmcs12(vcpu, vmcs12);
 
@@ -7468,6 +7816,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
        vcpu->cpu = cpu;
        put_cpu();
 
+       vmx_segment_cache_clear(vmx);
+
        /* if no vmcs02 cache requested, remove the one we used */
        if (VMCS02_POOL_SIZE == 0)
                nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
@@ -7590,6 +7940,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .load_eoi_exitmap = vmx_load_eoi_exitmap,
        .hwapic_irr_update = vmx_hwapic_irr_update,
        .hwapic_isr_update = vmx_hwapic_isr_update,
+       .sync_pir_to_irr = vmx_sync_pir_to_irr,
+       .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
 
        .set_tss_addr = vmx_set_tss_addr,
        .get_tdp_level = get_ept_level,
@@ -7618,6 +7970,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .set_tdp_cr3 = vmx_set_cr3,
 
        .check_intercept = vmx_check_intercept,
+       .handle_external_intr = vmx_handle_external_intr,
 };
 
 static int __init vmx_init(void)
@@ -7656,6 +8009,24 @@ static int __init vmx_init(void)
                                (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_msr_bitmap_longmode_x2apic)
                goto out4;
+       vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_vmread_bitmap)
+               goto out5;
+
+       vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_vmwrite_bitmap)
+               goto out6;
+
+       memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+       memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+       /* shadowed read/write fields */
+       for (i = 0; i < max_shadow_read_write_fields; i++) {
+               clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap);
+               clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap);
+       }
+       /* shadowed read only fields */
+       for (i = 0; i < max_shadow_read_only_fields; i++)
+               clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap);
 
        /*
         * Allow direct access to the PC debug port (it is often used for I/O
@@ -7674,7 +8045,7 @@ static int __init vmx_init(void)
        r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
                     __alignof__(struct vcpu_vmx), THIS_MODULE);
        if (r)
-               goto out3;
+               goto out7;
 
 #ifdef CONFIG_KEXEC
        rcu_assign_pointer(crash_vmclear_loaded_vmcss,
@@ -7692,7 +8063,7 @@ static int __init vmx_init(void)
        memcpy(vmx_msr_bitmap_longmode_x2apic,
                        vmx_msr_bitmap_longmode, PAGE_SIZE);
 
-       if (enable_apicv_reg_vid) {
+       if (enable_apicv) {
                for (msr = 0x800; msr <= 0x8ff; msr++)
                        vmx_disable_intercept_msr_read_x2apic(msr);
 
@@ -7722,6 +8093,12 @@ static int __init vmx_init(void)
 
        return 0;
 
+out7:
+       free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+       free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
 out4:
        free_page((unsigned long)vmx_msr_bitmap_longmode);
 out3:
@@ -7743,6 +8120,8 @@ static void __exit vmx_exit(void)
        free_page((unsigned long)vmx_msr_bitmap_longmode);
        free_page((unsigned long)vmx_io_bitmap_b);
        free_page((unsigned long)vmx_io_bitmap_a);
+       free_page((unsigned long)vmx_vmwrite_bitmap);
+       free_page((unsigned long)vmx_vmread_bitmap);
 
 #ifdef CONFIG_KEXEC
        rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
This page took 0.052888 seconds and 5 git commands to generate.