KVM: nVMX: Refactor handle_vmwrite
[deliverable/linux.git] / arch / x86 / kvm / vmx.c
index 5a87a58af49dc75b61de4f89b1886d74f86dd3a7..1ee63cae82cdcccf11ec4f2b3488b30c907fa424 100644 (file)
@@ -87,6 +87,8 @@ module_param(fasteoi, bool, S_IRUGO);
 static bool __read_mostly enable_apicv = 1;
 module_param(enable_apicv, bool, S_IRUGO);
 
+static bool __read_mostly enable_shadow_vmcs = 1;
+module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
 /*
  * If nested=1, nested virtualization is supported, i.e., guests may use
  * VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -482,6 +484,64 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
                                [number##_HIGH] = VMCS12_OFFSET(name)+4
 
+
+static const unsigned long shadow_read_only_fields[] = {
+       /*
+        * We do NOT shadow fields that are modified when L0
+        * traps and emulates any vmx instruction (e.g. VMPTRLD,
+        * VMXON...) executed by L1.
+        * For example, VM_INSTRUCTION_ERROR is read
+        * by L1 if a vmx instruction fails (part of the error path).
+        * Note the code assumes this logic. If for some reason
+        * we start shadowing these fields then we need to
+        * force a shadow sync when L0 emulates vmx instructions
+        * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
+        * by nested_vmx_failValid)
+        */
+       VM_EXIT_REASON,
+       VM_EXIT_INTR_INFO,
+       VM_EXIT_INSTRUCTION_LEN,
+       IDT_VECTORING_INFO_FIELD,
+       IDT_VECTORING_ERROR_CODE,
+       VM_EXIT_INTR_ERROR_CODE,
+       EXIT_QUALIFICATION,
+       GUEST_LINEAR_ADDRESS,
+       GUEST_PHYSICAL_ADDRESS
+};
+static const int max_shadow_read_only_fields =
+       ARRAY_SIZE(shadow_read_only_fields);
+
+static const unsigned long shadow_read_write_fields[] = {
+       GUEST_RIP,
+       GUEST_RSP,
+       GUEST_CR0,
+       GUEST_CR3,
+       GUEST_CR4,
+       GUEST_INTERRUPTIBILITY_INFO,
+       GUEST_RFLAGS,
+       GUEST_CS_SELECTOR,
+       GUEST_CS_AR_BYTES,
+       GUEST_CS_LIMIT,
+       GUEST_CS_BASE,
+       GUEST_ES_BASE,
+       CR0_GUEST_HOST_MASK,
+       CR0_READ_SHADOW,
+       CR4_READ_SHADOW,
+       TSC_OFFSET,
+       EXCEPTION_BITMAP,
+       CPU_BASED_VM_EXEC_CONTROL,
+       VM_ENTRY_EXCEPTION_ERROR_CODE,
+       VM_ENTRY_INTR_INFO_FIELD,
+       VM_ENTRY_INSTRUCTION_LEN,
+       VM_ENTRY_EXCEPTION_ERROR_CODE,
+       HOST_FS_BASE,
+       HOST_GS_BASE,
+       HOST_FS_SELECTOR,
+       HOST_GS_SELECTOR
+};
+static const int max_shadow_read_write_fields =
+       ARRAY_SIZE(shadow_read_write_fields);
+
 static const unsigned short vmcs_field_to_offset_table[] = {
        FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
        FIELD(GUEST_ES_SELECTOR, guest_es_selector),
@@ -673,6 +733,8 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
+static unsigned long *vmx_vmread_bitmap;
+static unsigned long *vmx_vmwrite_bitmap;
 
 static bool cpu_has_load_ia32_efer;
 static bool cpu_has_load_perf_global_ctrl;
@@ -940,6 +1002,18 @@ static inline bool cpu_has_vmx_wbinvd_exit(void)
                SECONDARY_EXEC_WBINVD_EXITING;
 }
 
+static inline bool cpu_has_vmx_shadow_vmcs(void)
+{
+       u64 vmx_msr;
+       rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+       /* check if the cpu supports writing r/o exit information fields */
+       if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
+               return false;
+
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_SHADOW_VMCS;
+}
+
 static inline bool report_flexpriority(void)
 {
        return flexpriority_enabled;
@@ -2632,7 +2706,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                        SECONDARY_EXEC_RDTSCP |
                        SECONDARY_EXEC_ENABLE_INVPCID |
                        SECONDARY_EXEC_APIC_REGISTER_VIRT |
-                       SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+                       SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+                       SECONDARY_EXEC_SHADOW_VMCS;
                if (adjust_vmx_controls(min2, opt2,
                                        MSR_IA32_VMX_PROCBASED_CTLS2,
                                        &_cpu_based_2nd_exec_control) < 0)
@@ -2833,6 +2908,8 @@ static __init int hardware_setup(void)
 
        if (!cpu_has_vmx_vpid())
                enable_vpid = 0;
+       if (!cpu_has_vmx_shadow_vmcs())
+               enable_shadow_vmcs = 0;
 
        if (!cpu_has_vmx_ept() ||
            !cpu_has_vmx_ept_4levels()) {
@@ -3605,7 +3682,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
                return true;
 
        /* real mode guest state checks */
-       if (!is_protmode(vcpu)) {
+       if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
                if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
                        return false;
                if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@@ -3946,10 +4023,12 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
 
        r = pi_test_and_set_on(&vmx->pi_desc);
        kvm_make_request(KVM_REQ_EVENT, vcpu);
+#ifdef CONFIG_SMP
        if (!r && (vcpu->mode == IN_GUEST_MODE))
                apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
                                POSTED_INTR_VECTOR);
        else
+#endif
                kvm_vcpu_kick(vcpu);
 }
 
@@ -4075,6 +4154,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
                exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
        exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+       /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
+          (handle_vmptrld).
+          We can NOT enable shadow_vmcs here because we don't have yet
+          a current VMCS12
+       */
+       exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
        return exec_control;
 }
 
@@ -4103,6 +4188,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
        vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
 
+       if (enable_shadow_vmcs) {
+               vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
+               vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+       }
        if (cpu_has_vmx_msr_bitmap())
                vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
 
@@ -5753,6 +5842,33 @@ static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
        }
 }
 
+
+static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
+                                   unsigned long field, u64 field_value){
+       short offset = vmcs_field_to_offset(field);
+       char *p = ((char *) get_vmcs12(vcpu)) + offset;
+       if (offset < 0)
+               return false;
+
+       switch (vmcs_field_type(field)) {
+       case VMCS_FIELD_TYPE_U16:
+               *(u16 *)p = field_value;
+               return true;
+       case VMCS_FIELD_TYPE_U32:
+               *(u32 *)p = field_value;
+               return true;
+       case VMCS_FIELD_TYPE_U64:
+               *(u64 *)p = field_value;
+               return true;
+       case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+               *(natural_width *)p = field_value;
+               return true;
+       default:
+               return false; /* can never happen. */
+       }
+
+}
+
 /*
  * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
  * used before) all generate the same failure when it is missing.
@@ -5817,8 +5933,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
        gva_t gva;
        unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-       char *p;
-       short offset;
        /* The value to write might be 32 or 64 bits, depending on L1's long
         * mode, and eventually we need to write that into a field of several
         * possible lengths. The code below first zero-extends the value to 64
@@ -5855,28 +5969,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
                return 1;
        }
 
-       offset = vmcs_field_to_offset(field);
-       if (offset < 0) {
-               nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-               skip_emulated_instruction(vcpu);
-               return 1;
-       }
-       p = ((char *) get_vmcs12(vcpu)) + offset;
-
-       switch (vmcs_field_type(field)) {
-       case VMCS_FIELD_TYPE_U16:
-               *(u16 *)p = field_value;
-               break;
-       case VMCS_FIELD_TYPE_U32:
-               *(u32 *)p = field_value;
-               break;
-       case VMCS_FIELD_TYPE_U64:
-               *(u64 *)p = field_value;
-               break;
-       case VMCS_FIELD_TYPE_NATURAL_WIDTH:
-               *(natural_width *)p = field_value;
-               break;
-       default:
+       if (!vmcs12_write_any(vcpu, field, field_value)) {
                nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
                skip_emulated_instruction(vcpu);
                return 1;
@@ -7106,7 +7199,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                vmcs12->vm_entry_instruction_len);
        vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
                vmcs12->guest_interruptibility_info);
-       vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state);
        vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
        kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
        vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags);
@@ -7325,6 +7417,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                return 1;
        }
 
+       if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) {
+               nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+               return 1;
+       }
+
        if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
                        !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) {
                /*TODO: Also verify bits beyond physical address width are 0*/
@@ -7555,7 +7652,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
        vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
 
-       vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE);
        vmcs12->guest_interruptibility_info =
                vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
        vmcs12->guest_pending_dbg_exceptions =
@@ -7913,6 +8009,24 @@ static int __init vmx_init(void)
                                (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_msr_bitmap_longmode_x2apic)
                goto out4;
+       vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_vmread_bitmap)
+               goto out5;
+
+       vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_vmwrite_bitmap)
+               goto out6;
+
+       memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+       memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+       /* shadowed read/write fields */
+       for (i = 0; i < max_shadow_read_write_fields; i++) {
+               clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap);
+               clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap);
+       }
+       /* shadowed read only fields */
+       for (i = 0; i < max_shadow_read_only_fields; i++)
+               clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap);
 
        /*
         * Allow direct access to the PC debug port (it is often used for I/O
@@ -7931,7 +8045,7 @@ static int __init vmx_init(void)
        r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
                     __alignof__(struct vcpu_vmx), THIS_MODULE);
        if (r)
-               goto out5;
+               goto out7;
 
 #ifdef CONFIG_KEXEC
        rcu_assign_pointer(crash_vmclear_loaded_vmcss,
@@ -7979,6 +8093,10 @@ static int __init vmx_init(void)
 
        return 0;
 
+out7:
+       free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+       free_page((unsigned long)vmx_vmread_bitmap);
 out5:
        free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
 out4:
@@ -8002,6 +8120,8 @@ static void __exit vmx_exit(void)
        free_page((unsigned long)vmx_msr_bitmap_longmode);
        free_page((unsigned long)vmx_io_bitmap_b);
        free_page((unsigned long)vmx_io_bitmap_a);
+       free_page((unsigned long)vmx_vmwrite_bitmap);
+       free_page((unsigned long)vmx_vmread_bitmap);
 
 #ifdef CONFIG_KEXEC
        rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
This page took 0.032022 seconds and 5 git commands to generate.