KVM: VMX: Add instruction rdtscp support for guest
[deliverable/linux.git] / arch / x86 / kvm / x86.c
index 1ddcad452add0808b3c3aac39c291db32c3e620a..8798504ace110c8bc5c766bbfb65e8031b9f7849 100644 (file)
@@ -93,16 +93,16 @@ module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
 
 struct kvm_shared_msrs_global {
        int nr;
-       struct kvm_shared_msr {
-               u32 msr;
-               u64 value;
-       } msrs[KVM_NR_SHARED_MSRS];
+       u32 msrs[KVM_NR_SHARED_MSRS];
 };
 
 struct kvm_shared_msrs {
        struct user_return_notifier urn;
        bool registered;
-       u64 current_value[KVM_NR_SHARED_MSRS];
+       struct kvm_shared_msr_values {
+               u64 host;
+               u64 curr;
+       } values[KVM_NR_SHARED_MSRS];
 };
 
 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
@@ -147,53 +147,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 static void kvm_on_user_return(struct user_return_notifier *urn)
 {
        unsigned slot;
-       struct kvm_shared_msr *global;
        struct kvm_shared_msrs *locals
                = container_of(urn, struct kvm_shared_msrs, urn);
+       struct kvm_shared_msr_values *values;
 
        for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
-               global = &shared_msrs_global.msrs[slot];
-               if (global->value != locals->current_value[slot]) {
-                       wrmsrl(global->msr, global->value);
-                       locals->current_value[slot] = global->value;
+               values = &locals->values[slot];
+               if (values->host != values->curr) {
+                       wrmsrl(shared_msrs_global.msrs[slot], values->host);
+                       values->curr = values->host;
                }
        }
        locals->registered = false;
        user_return_notifier_unregister(urn);
 }
 
-void kvm_define_shared_msr(unsigned slot, u32 msr)
+static void shared_msr_update(unsigned slot, u32 msr)
 {
-       int cpu;
+       struct kvm_shared_msrs *smsr;
        u64 value;
 
+       smsr = &__get_cpu_var(shared_msrs);
+       /* only read, and nobody should modify it at this time,
+        * so don't need lock */
+       if (slot >= shared_msrs_global.nr) {
+               printk(KERN_ERR "kvm: invalid MSR slot!");
+               return;
+       }
+       rdmsrl_safe(msr, &value);
+       smsr->values[slot].host = value;
+       smsr->values[slot].curr = value;
+}
+
+void kvm_define_shared_msr(unsigned slot, u32 msr)
+{
        if (slot >= shared_msrs_global.nr)
                shared_msrs_global.nr = slot + 1;
-       shared_msrs_global.msrs[slot].msr = msr;
-       rdmsrl_safe(msr, &value);
-       shared_msrs_global.msrs[slot].value = value;
-       for_each_online_cpu(cpu)
-               per_cpu(shared_msrs, cpu).current_value[slot] = value;
+       shared_msrs_global.msrs[slot] = msr;
+       /* we need ensured the shared_msr_global have been updated */
+       smp_wmb();
 }
 EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
 
 static void kvm_shared_msr_cpu_online(void)
 {
        unsigned i;
-       struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
 
        for (i = 0; i < shared_msrs_global.nr; ++i)
-               locals->current_value[i] = shared_msrs_global.msrs[i].value;
+               shared_msr_update(i, shared_msrs_global.msrs[i]);
 }
 
 void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 {
        struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
 
-       if (((value ^ smsr->current_value[slot]) & mask) == 0)
+       if (((value ^ smsr->values[slot].curr) & mask) == 0)
                return;
-       smsr->current_value[slot] = value;
-       wrmsrl(shared_msrs_global.msrs[slot].msr, value);
+       smsr->values[slot].curr = value;
+       wrmsrl(shared_msrs_global.msrs[slot], value);
        if (!smsr->registered) {
                smsr->urn.on_user_return = kvm_on_user_return;
                user_return_notifier_register(&smsr->urn);
@@ -257,12 +268,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
+#define EXCPT_BENIGN           0
+#define EXCPT_CONTRIBUTORY     1
+#define EXCPT_PF               2
+
+static int exception_class(int vector)
+{
+       switch (vector) {
+       case PF_VECTOR:
+               return EXCPT_PF;
+       case DE_VECTOR:
+       case TS_VECTOR:
+       case NP_VECTOR:
+       case SS_VECTOR:
+       case GP_VECTOR:
+               return EXCPT_CONTRIBUTORY;
+       default:
+               break;
+       }
+       return EXCPT_BENIGN;
+}
+
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
+               unsigned nr, bool has_error, u32 error_code)
+{
+       u32 prev_nr;
+       int class1, class2;
+
+       if (!vcpu->arch.exception.pending) {
+       queue:
+               vcpu->arch.exception.pending = true;
+               vcpu->arch.exception.has_error_code = has_error;
+               vcpu->arch.exception.nr = nr;
+               vcpu->arch.exception.error_code = error_code;
+               return;
+       }
+
+       /* to check exception */
+       prev_nr = vcpu->arch.exception.nr;
+       if (prev_nr == DF_VECTOR) {
+               /* triple fault -> shutdown */
+               set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+               return;
+       }
+       class1 = exception_class(prev_nr);
+       class2 = exception_class(nr);
+       if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
+               || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+               /* generate double fault per SDM Table 5-5 */
+               vcpu->arch.exception.pending = true;
+               vcpu->arch.exception.has_error_code = true;
+               vcpu->arch.exception.nr = DF_VECTOR;
+               vcpu->arch.exception.error_code = 0;
+       } else
+               /* replace previous exception with a new one in a hope
+                  that instruction re-execution will regenerate lost
+                  exception */
+               goto queue;
+}
+
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
-       WARN_ON(vcpu->arch.exception.pending);
-       vcpu->arch.exception.pending = true;
-       vcpu->arch.exception.has_error_code = false;
-       vcpu->arch.exception.nr = nr;
+       kvm_multiple_exception(vcpu, nr, false, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_queue_exception);
 
@@ -270,25 +337,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
                           u32 error_code)
 {
        ++vcpu->stat.pf_guest;
-
-       if (vcpu->arch.exception.pending) {
-               switch(vcpu->arch.exception.nr) {
-               case DF_VECTOR:
-                       /* triple fault -> shutdown */
-                       set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
-                       return;
-               case PF_VECTOR:
-                       vcpu->arch.exception.nr = DF_VECTOR;
-                       vcpu->arch.exception.error_code = 0;
-                       return;
-               default:
-                       /* replace previous exception with a new one in a hope
-                          that instruction re-execution will regenerate lost
-                          exception */
-                       vcpu->arch.exception.pending = false;
-                       break;
-               }
-       }
        vcpu->arch.cr2 = addr;
        kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
 }
@@ -301,11 +349,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
 
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 {
-       WARN_ON(vcpu->arch.exception.pending);
-       vcpu->arch.exception.pending = true;
-       vcpu->arch.exception.has_error_code = true;
-       vcpu->arch.exception.nr = nr;
-       vcpu->arch.exception.error_code = error_code;
+       kvm_multiple_exception(vcpu, nr, true, error_code);
 }
 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
 
@@ -449,7 +493,7 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
 
 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       unsigned long old_cr4 = vcpu->arch.cr4;
+       unsigned long old_cr4 = kvm_read_cr4(vcpu);
        unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
 
        if (cr4 & CR4_RESERVED_BITS) {
@@ -670,7 +714,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 {
        static int version;
        struct pvclock_wall_clock wc;
-       struct timespec now, sys, boot;
+       struct timespec boot;
 
        if (!wall_clock)
                return;
@@ -685,9 +729,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
         * wall clock specified here.  guest system time equals host
         * system time for us, thus we must fill in host boot time here.
         */
-       now = current_kernel_time();
-       ktime_get_ts(&sys);
-       boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+       getboottime(&boot);
 
        wc.sec = boot.tv_sec;
        wc.nsec = boot.tv_nsec;
@@ -762,6 +804,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
        local_irq_save(flags);
        kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
        ktime_get_ts(&ts);
+       monotonic_to_bootbased(&ts);
        local_irq_restore(flags);
 
        /* With all the info we got, fill in the values */
@@ -1531,6 +1574,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
        cpuid_fix_nx_cap(vcpu);
        r = 0;
        kvm_apic_set_version(vcpu);
+       kvm_x86_ops->cpuid_update(vcpu);
 
 out_free:
        vfree(cpuid_entries);
@@ -1553,6 +1597,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
                goto out;
        vcpu->arch.cpuid_nent = cpuid->nent;
        kvm_apic_set_version(vcpu);
+       kvm_x86_ops->cpuid_update(vcpu);
        return 0;
 
 out:
@@ -1601,6 +1646,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 #else
        unsigned f_lm = 0;
 #endif
+       unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
 
        /* cpuid 1.edx */
        const u32 kvm_supported_word0_x86_features =
@@ -1620,7 +1666,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
                F(PAT) | F(PSE36) | 0 /* Reserved */ |
                f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
-               F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
+               F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
                0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
        /* cpuid 1.ecx */
        const u32 kvm_supported_word4_x86_features =
@@ -1867,7 +1913,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
                return 0;
        if (mce->status & MCI_STATUS_UC) {
                if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
-                   !(vcpu->arch.cr4 & X86_CR4_MCE)) {
+                   !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
                        printk(KERN_DEBUG "kvm: set_mce: "
                               "injects mce exception while "
                               "previous one is in progress!\n");
@@ -3584,7 +3630,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 {
        unsigned long value;
 
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
        switch (cr) {
        case 0:
                value = vcpu->arch.cr0;
@@ -3596,7 +3641,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
                value = vcpu->arch.cr3;
                break;
        case 4:
-               value = vcpu->arch.cr4;
+               value = kvm_read_cr4(vcpu);
                break;
        case 8:
                value = kvm_get_cr8(vcpu);
@@ -3624,7 +3669,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
                kvm_set_cr3(vcpu, val);
                break;
        case 4:
-               kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
+               kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
                break;
        case 8:
                kvm_set_cr8(vcpu, val & 0xfUL);
@@ -3691,6 +3736,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
        }
        return best;
 }
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
 {
@@ -4205,11 +4251,10 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        sregs->gdt.limit = dt.limit;
        sregs->gdt.base = dt.base;
 
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
        sregs->cr0 = vcpu->arch.cr0;
        sregs->cr2 = vcpu->arch.cr2;
        sregs->cr3 = vcpu->arch.cr3;
-       sregs->cr4 = vcpu->arch.cr4;
+       sregs->cr4 = kvm_read_cr4(vcpu);
        sregs->cr8 = kvm_get_cr8(vcpu);
        sregs->efer = vcpu->arch.shadow_efer;
        sregs->apic_base = kvm_get_apic_base(vcpu);
@@ -4378,6 +4423,15 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
                (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
 }
 
+static void kvm_check_segment_descriptor(struct kvm_vcpu *vcpu, int seg,
+                                        u16 selector)
+{
+       /* NULL selector is not valid for CS and SS */
+       if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
+               if (!selector)
+                       kvm_queue_exception_e(vcpu, TS_VECTOR, selector >> 3);
+}
+
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                                int type_bits, int seg)
 {
@@ -4387,6 +4441,8 @@ int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                return kvm_load_realmode_segment(vcpu, selector, seg);
        if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
                return 1;
+
+       kvm_check_segment_descriptor(vcpu, seg, selector);
        kvm_seg.type |= type_bits;
 
        if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
@@ -4694,13 +4750,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        kvm_x86_ops->set_efer(vcpu, sregs->efer);
        kvm_set_apic_base(vcpu, sregs->apic_base);
 
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
-
        mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
        kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
        vcpu->arch.cr0 = sregs->cr0;
 
-       mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
+       mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
        kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
        if (!is_long_mode(vcpu) && is_pae(vcpu)) {
                load_pdptrs(vcpu, vcpu->arch.cr3);
This page took 0.028315 seconds and 5 git commands to generate.