KVM: VMX: Add instruction rdtscp support for guest

[deliverable/linux.git] / arch / x86 / kvm / x86.c
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 1ddcad452add0808b3c3aac39c291db32c3e620a..8798504ace110c8bc5c766bbfb65e8031b9f7849 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -93,16 +93,16 @@ module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
  
  struct kvm_shared_msrs_global {
         int nr;
-       struct kvm_shared_msr {
-               u32 msr;
-               u64 value;
-       } msrs[KVM_NR_SHARED_MSRS];
+       u32 msrs[KVM_NR_SHARED_MSRS];
  };
  
  struct kvm_shared_msrs {
         struct user_return_notifier urn;
         bool registered;
-       u64 current_value[KVM_NR_SHARED_MSRS];
+       struct kvm_shared_msr_values {
+               u64 host;
+               u64 curr;
+       } values[KVM_NR_SHARED_MSRS];
  };
  
  static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
@@ -147,53 +147,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
  static void kvm_on_user_return(struct user_return_notifier *urn)
  {
         unsigned slot;
-       struct kvm_shared_msr *global;
         struct kvm_shared_msrs *locals
                 = container_of(urn, struct kvm_shared_msrs, urn);
+       struct kvm_shared_msr_values *values;
  
         for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
-               global = &shared_msrs_global.msrs[slot];
-               if (global->value != locals->current_value[slot]) {
-                       wrmsrl(global->msr, global->value);
-                       locals->current_value[slot] = global->value;
+               values = &locals->values[slot];
+               if (values->host != values->curr) {
+                       wrmsrl(shared_msrs_global.msrs[slot], values->host);
+                       values->curr = values->host;
                 }
         }
         locals->registered = false;
         user_return_notifier_unregister(urn);
  }
  
-void kvm_define_shared_msr(unsigned slot, u32 msr)
+static void shared_msr_update(unsigned slot, u32 msr)
  {
-       int cpu;
+       struct kvm_shared_msrs *smsr;
         u64 value;
  
+       smsr = &__get_cpu_var(shared_msrs);
+       /* only read, and nobody should modify it at this time,
+        * so don't need lock */
+       if (slot >= shared_msrs_global.nr) {
+               printk(KERN_ERR "kvm: invalid MSR slot!");
+               return;
+       }
+       rdmsrl_safe(msr, &value);
+       smsr->values[slot].host = value;
+       smsr->values[slot].curr = value;
+}
+
+void kvm_define_shared_msr(unsigned slot, u32 msr)
+{
         if (slot >= shared_msrs_global.nr)
                 shared_msrs_global.nr = slot + 1;
-       shared_msrs_global.msrs[slot].msr = msr;
-       rdmsrl_safe(msr, &value);
-       shared_msrs_global.msrs[slot].value = value;
-       for_each_online_cpu(cpu)
-               per_cpu(shared_msrs, cpu).current_value[slot] = value;
+       shared_msrs_global.msrs[slot] = msr;
+       /* we need ensured the shared_msr_global have been updated */
+       smp_wmb();
  }
  EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
  
  static void kvm_shared_msr_cpu_online(void)
  {
         unsigned i;
-       struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
  
         for (i = 0; i < shared_msrs_global.nr; ++i)
-               locals->current_value[i] = shared_msrs_global.msrs[i].value;
+               shared_msr_update(i, shared_msrs_global.msrs[i]);
  }
  
  void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
  {
         struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
  
-       if (((value ^ smsr->current_value[slot]) & mask) == 0)
+       if (((value ^ smsr->values[slot].curr) & mask) == 0)
                 return;
-       smsr->current_value[slot] = value;
-       wrmsrl(shared_msrs_global.msrs[slot].msr, value);
+       smsr->values[slot].curr = value;
+       wrmsrl(shared_msrs_global.msrs[slot], value);
         if (!smsr->registered) {
                 smsr->urn.on_user_return = kvm_on_user_return;
                 user_return_notifier_register(&smsr->urn);
@@ -257,12 +268,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
  }
  EXPORT_SYMBOL_GPL(kvm_set_apic_base);
  
+#define EXCPT_BENIGN           0
+#define EXCPT_CONTRIBUTORY     1
+#define EXCPT_PF               2
+
+static int exception_class(int vector)
+{
+       switch (vector) {
+       case PF_VECTOR:
+               return EXCPT_PF;
+       case DE_VECTOR:
+       case TS_VECTOR:
+       case NP_VECTOR:
+       case SS_VECTOR:
+       case GP_VECTOR:
+               return EXCPT_CONTRIBUTORY;
+       default:
+               break;
+       }
+       return EXCPT_BENIGN;
+}
+
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
+               unsigned nr, bool has_error, u32 error_code)
+{
+       u32 prev_nr;
+       int class1, class2;
+
+       if (!vcpu->arch.exception.pending) {
+       queue:
+               vcpu->arch.exception.pending = true;
+               vcpu->arch.exception.has_error_code = has_error;
+               vcpu->arch.exception.nr = nr;
+               vcpu->arch.exception.error_code = error_code;
+               return;
+       }
+
+       /* to check exception */
+       prev_nr = vcpu->arch.exception.nr;
+       if (prev_nr == DF_VECTOR) {
+               /* triple fault -> shutdown */
+               set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+               return;
+       }
+       class1 = exception_class(prev_nr);
+       class2 = exception_class(nr);
+       if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
+               || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+               /* generate double fault per SDM Table 5-5 */
+               vcpu->arch.exception.pending = true;
+               vcpu->arch.exception.has_error_code = true;
+               vcpu->arch.exception.nr = DF_VECTOR;
+               vcpu->arch.exception.error_code = 0;
+       } else
+               /* replace previous exception with a new one in a hope
+                  that instruction re-execution will regenerate lost
+                  exception */
+               goto queue;
+}
+
  void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
  {
-       WARN_ON(vcpu->arch.exception.pending);
-       vcpu->arch.exception.pending = true;
-       vcpu->arch.exception.has_error_code = false;
-       vcpu->arch.exception.nr = nr;
+       kvm_multiple_exception(vcpu, nr, false, 0);
  }
  EXPORT_SYMBOL_GPL(kvm_queue_exception);
  
@@ -270,25 +337,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
                            u32 error_code)
  {
         ++vcpu->stat.pf_guest;
-
-       if (vcpu->arch.exception.pending) {
-               switch(vcpu->arch.exception.nr) {
-               case DF_VECTOR:
-                       /* triple fault -> shutdown */
-                       set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
-                       return;
-               case PF_VECTOR:
-                       vcpu->arch.exception.nr = DF_VECTOR;
-                       vcpu->arch.exception.error_code = 0;
-                       return;
-               default:
-                       /* replace previous exception with a new one in a hope
-                          that instruction re-execution will regenerate lost
-                          exception */
-                       vcpu->arch.exception.pending = false;
-                       break;
-               }
-       }
         vcpu->arch.cr2 = addr;
         kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
  }
@@ -301,11 +349,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
  
  void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
  {
-       WARN_ON(vcpu->arch.exception.pending);
-       vcpu->arch.exception.pending = true;
-       vcpu->arch.exception.has_error_code = true;
-       vcpu->arch.exception.nr = nr;
-       vcpu->arch.exception.error_code = error_code;
+       kvm_multiple_exception(vcpu, nr, true, error_code);
  }
  EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
  
@@ -449,7 +493,7 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
  
  void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
-       unsigned long old_cr4 = vcpu->arch.cr4;
+       unsigned long old_cr4 = kvm_read_cr4(vcpu);
         unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
  
         if (cr4 & CR4_RESERVED_BITS) {
@@ -670,7 +714,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
  {
         static int version;
         struct pvclock_wall_clock wc;
-       struct timespec now, sys, boot;
+       struct timespec boot;
  
         if (!wall_clock)
                 return;
@@ -685,9 +729,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
          * wall clock specified here.  guest system time equals host
          * system time for us, thus we must fill in host boot time here.
          */
-       now = current_kernel_time();
-       ktime_get_ts(&sys);
-       boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+       getboottime(&boot);
  
         wc.sec = boot.tv_sec;
         wc.nsec = boot.tv_nsec;
@@ -762,6 +804,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
         local_irq_save(flags);
         kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
         ktime_get_ts(&ts);
+       monotonic_to_bootbased(&ts);
         local_irq_restore(flags);
  
         /* With all the info we got, fill in the values */
@@ -1531,6 +1574,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
         cpuid_fix_nx_cap(vcpu);
         r = 0;
         kvm_apic_set_version(vcpu);
+       kvm_x86_ops->cpuid_update(vcpu);
  
  out_free:
         vfree(cpuid_entries);
@@ -1553,6 +1597,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
                 goto out;
         vcpu->arch.cpuid_nent = cpuid->nent;
         kvm_apic_set_version(vcpu);
+       kvm_x86_ops->cpuid_update(vcpu);
         return 0;
  
  out:
@@ -1601,6 +1646,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
  #else
         unsigned f_lm = 0;
  #endif
+       unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
  
         /* cpuid 1.edx */
         const u32 kvm_supported_word0_x86_features =
@@ -1620,7 +1666,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
                 F(PAT) | F(PSE36) | 0 /* Reserved */ |
                 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
-               F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
+               F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
                 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
         /* cpuid 1.ecx */
         const u32 kvm_supported_word4_x86_features =
@@ -1867,7 +1913,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
                 return 0;
         if (mce->status & MCI_STATUS_UC) {
                 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
-                   !(vcpu->arch.cr4 & X86_CR4_MCE)) {
+                   !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
                         printk(KERN_DEBUG "kvm: set_mce: "
                                "injects mce exception while "
                                "previous one is in progress!\n");
@@ -3584,7 +3630,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
  {
         unsigned long value;
  
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
         switch (cr) {
         case 0:
                 value = vcpu->arch.cr0;
@@ -3596,7 +3641,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
                 value = vcpu->arch.cr3;
                 break;
         case 4:
-               value = vcpu->arch.cr4;
+               value = kvm_read_cr4(vcpu);
                 break;
         case 8:
                 value = kvm_get_cr8(vcpu);
@@ -3624,7 +3669,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
                 kvm_set_cr3(vcpu, val);
                 break;
         case 4:
-               kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
+               kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
                 break;
         case 8:
                 kvm_set_cr8(vcpu, val & 0xfUL);
@@ -3691,6 +3736,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
         }
         return best;
  }
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
  
  int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
  {
@@ -4205,11 +4251,10 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
         sregs->gdt.limit = dt.limit;
         sregs->gdt.base = dt.base;
  
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
         sregs->cr0 = vcpu->arch.cr0;
         sregs->cr2 = vcpu->arch.cr2;
         sregs->cr3 = vcpu->arch.cr3;
-       sregs->cr4 = vcpu->arch.cr4;
+       sregs->cr4 = kvm_read_cr4(vcpu);
         sregs->cr8 = kvm_get_cr8(vcpu);
         sregs->efer = vcpu->arch.shadow_efer;
         sregs->apic_base = kvm_get_apic_base(vcpu);
@@ -4378,6 +4423,15 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
                 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
  }
  
+static void kvm_check_segment_descriptor(struct kvm_vcpu *vcpu, int seg,
+                                        u16 selector)
+{
+       /* NULL selector is not valid for CS and SS */
+       if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
+               if (!selector)
+                       kvm_queue_exception_e(vcpu, TS_VECTOR, selector >> 3);
+}
+
  int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                                 int type_bits, int seg)
  {
@@ -4387,6 +4441,8 @@ int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                 return kvm_load_realmode_segment(vcpu, selector, seg);
         if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
                 return 1;
+
+       kvm_check_segment_descriptor(vcpu, seg, selector);
         kvm_seg.type |= type_bits;
  
         if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
@@ -4694,13 +4750,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
         kvm_x86_ops->set_efer(vcpu, sregs->efer);
         kvm_set_apic_base(vcpu, sregs->apic_base);
  
-       kvm_x86_ops->decache_cr4_guest_bits(vcpu);
-
         mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
         kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
         vcpu->arch.cr0 = sregs->cr0;
  
-       mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
+       mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
         kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
         if (!is_long_mode(vcpu) && is_pae(vcpu)) {
                 load_pdptrs(vcpu, vcpu->arch.cr3);