arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <asm/asm-offsets.h>
  32 #include <asm/lowcore.h>
  33 #include <asm/stp.h>
  34 #include <asm/pgtable.h>
  35 #include <asm/gmap.h>
  36 #include <asm/nmi.h>
  37 #include <asm/switch_to.h>
  38 #include <asm/isc.h>
  39 #include <asm/sclp.h>
  40 #include <asm/cpacf.h>
  41 #include <asm/timex.h>
  42 #include "kvm-s390.h"
  43 #include "gaccess.h"
  44
  45 #define KMSG_COMPONENT "kvm-s390"
  46 #undef pr_fmt
  47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49 #define CREATE_TRACE_POINTS
  50 #include "trace.h"
  51 #include "trace-s390.h"
  52
  53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  54 #define LOCAL_IRQS 32
  55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         { "userspace_handled", VCPU_STAT(exit_userspace) },
  62         { "exit_null", VCPU_STAT(exit_null) },
  63         { "exit_validity", VCPU_STAT(exit_validity) },
  64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65         { "exit_external_request", VCPU_STAT(exit_external_request) },
  66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67         { "exit_instruction", VCPU_STAT(exit_instruction) },
  68         { "exit_pei", VCPU_STAT(exit_pei) },
  69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  91         { "instruction_spx", VCPU_STAT(instruction_spx) },
  92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  93         { "instruction_stap", VCPU_STAT(instruction_stap) },
  94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  98         { "instruction_essa", VCPU_STAT(instruction_essa) },
  99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 103         { "instruction_sie", VCPU_STAT(instruction_sie) },
 104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 120         { "diagnose_10", VCPU_STAT(diagnose_10) },
 121         { "diagnose_44", VCPU_STAT(diagnose_44) },
 122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 123         { "diagnose_258", VCPU_STAT(diagnose_258) },
 124         { "diagnose_308", VCPU_STAT(diagnose_308) },
 125         { "diagnose_500", VCPU_STAT(diagnose_500) },
 126         { NULL }
 127 };
 128
 129 /* allow nested virtualization in KVM (if enabled by user space) */
 130 static int nested;
 131 module_param(nested, int, S_IRUGO);
 132 MODULE_PARM_DESC(nested, "Nested virtualization support");
 133
 134 /* upper facilities limit for kvm */
 135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 136
 137 unsigned long kvm_s390_fac_list_mask_size(void)
 138 {
 139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 141 }
 142
 143 /* available cpu features supported by kvm */
 144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 145 /* available subfunctions indicated via query / "test bit" */
 146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 147
 148 static struct gmap_notifier gmap_notifier;
 149 static struct gmap_notifier vsie_gmap_notifier;
 150 debug_info_t *kvm_s390_dbf;
 151
 152 /* Section: not file related */
 153 int kvm_arch_hardware_enable(void)
 154 {
 155         /* every s390 is virtualization enabled ;-) */
 156         return 0;
 157 }
 158
 159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 160                               unsigned long end);
 161
 162 /*
 163  * This callback is executed during stop_machine(). All CPUs are therefore
 164  * temporarily stopped. In order not to change guest behavior, we have to
 165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 166  * so a CPU won't be stopped while calculating with the epoch.
 167  */
 168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 169                           void *v)
 170 {
 171         struct kvm *kvm;
 172         struct kvm_vcpu *vcpu;
 173         int i;
 174         unsigned long long *delta = v;
 175
 176         list_for_each_entry(kvm, &vm_list, vm_list) {
 177                 kvm->arch.epoch -= *delta;
 178                 kvm_for_each_vcpu(i, vcpu, kvm) {
 179                         vcpu->arch.sie_block->epoch -= *delta;
 180                         if (vcpu->arch.cputm_enabled)
 181                                 vcpu->arch.cputm_start += *delta;
 182                         if (vcpu->arch.vsie_block)
 183                                 vcpu->arch.vsie_block->epoch -= *delta;
 184                 }
 185         }
 186         return NOTIFY_OK;
 187 }
 188
 189 static struct notifier_block kvm_clock_notifier = {
 190         .notifier_call = kvm_clock_sync,
 191 };
 192
 193 int kvm_arch_hardware_setup(void)
 194 {
 195         gmap_notifier.notifier_call = kvm_gmap_notifier;
 196         gmap_register_pte_notifier(&gmap_notifier);
 197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 198         gmap_register_pte_notifier(&vsie_gmap_notifier);
 199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 200                                        &kvm_clock_notifier);
 201         return 0;
 202 }
 203
 204 void kvm_arch_hardware_unsetup(void)
 205 {
 206         gmap_unregister_pte_notifier(&gmap_notifier);
 207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 209                                          &kvm_clock_notifier);
 210 }
 211
 212 static void allow_cpu_feat(unsigned long nr)
 213 {
 214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 215 }
 216
 217 static inline int plo_test_bit(unsigned char nr)
 218 {
 219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 220         int cc = 3; /* subfunction not available */
 221
 222         asm volatile(
 223                 /* Parameter registers are ignored for "test bit" */
 224                 "       plo     0,0,0,0(0)\n"
 225                 "       ipm     %0\n"
 226                 "       srl     %0,28\n"
 227                 : "=d" (cc)
 228                 : "d" (r0)
 229                 : "cc");
 230         return cc == 0;
 231 }
 232
 233 static void kvm_s390_cpu_feat_init(void)
 234 {
 235         int i;
 236
 237         for (i = 0; i < 256; ++i) {
 238                 if (plo_test_bit(i))
 239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 240         }
 241
 242         if (test_facility(28)) /* TOD-clock steering */
 243                 ptff(kvm_s390_available_subfunc.ptff,
 244                      sizeof(kvm_s390_available_subfunc.ptff),
 245                      PTFF_QAF);
 246
 247         if (test_facility(17)) { /* MSA */
 248                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 249                               kvm_s390_available_subfunc.kmac);
 250                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 251                               kvm_s390_available_subfunc.kmc);
 252                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 253                               kvm_s390_available_subfunc.km);
 254                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 255                               kvm_s390_available_subfunc.kimd);
 256                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 257                               kvm_s390_available_subfunc.klmd);
 258         }
 259         if (test_facility(76)) /* MSA3 */
 260                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 261                               kvm_s390_available_subfunc.pckmo);
 262         if (test_facility(77)) { /* MSA4 */
 263                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 264                               kvm_s390_available_subfunc.kmctr);
 265                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 266                               kvm_s390_available_subfunc.kmf);
 267                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 268                               kvm_s390_available_subfunc.kmo);
 269                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 270                               kvm_s390_available_subfunc.pcc);
 271         }
 272         if (test_facility(57)) /* MSA5 */
 273                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
 274                               kvm_s390_available_subfunc.ppno);
 275
 276         if (MACHINE_HAS_ESOP)
 277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 278         /*
 279          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 280          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 281          */
 282         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 283             !test_facility(3) || !nested)
 284                 return;
 285         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 286         if (sclp.has_64bscao)
 287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 288         if (sclp.has_siif)
 289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 290         if (sclp.has_gpere)
 291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 292         if (sclp.has_gsls)
 293                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 294         if (sclp.has_ib)
 295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 296         if (sclp.has_cei)
 297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 298         if (sclp.has_ibs)
 299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 300         /*
 301          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 302          * all skey handling functions read/set the skey from the PGSTE
 303          * instead of the real storage key.
 304          *
 305          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 306          * pages being detected as preserved although they are resident.
 307          *
 308          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 309          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 310          *
 311          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 312          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 313          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 314          *
 315          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 316          * cannot easily shadow the SCA because of the ipte lock.
 317          */
 318 }
 319
 320 int kvm_arch_init(void *opaque)
 321 {
 322         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 323         if (!kvm_s390_dbf)
 324                 return -ENOMEM;
 325
 326         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 327                 debug_unregister(kvm_s390_dbf);
 328                 return -ENOMEM;
 329         }
 330
 331         kvm_s390_cpu_feat_init();
 332
 333         /* Register floating interrupt controller interface. */
 334         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 335 }
 336
 337 void kvm_arch_exit(void)
 338 {
 339         debug_unregister(kvm_s390_dbf);
 340 }
 341
 342 /* Section: device related */
 343 long kvm_arch_dev_ioctl(struct file *filp,
 344                         unsigned int ioctl, unsigned long arg)
 345 {
 346         if (ioctl == KVM_S390_ENABLE_SIE)
 347                 return s390_enable_sie();
 348         return -EINVAL;
 349 }
 350
 351 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 352 {
 353         int r;
 354
 355         switch (ext) {
 356         case KVM_CAP_S390_PSW:
 357         case KVM_CAP_S390_GMAP:
 358         case KVM_CAP_SYNC_MMU:
 359 #ifdef CONFIG_KVM_S390_UCONTROL
 360         case KVM_CAP_S390_UCONTROL:
 361 #endif
 362         case KVM_CAP_ASYNC_PF:
 363         case KVM_CAP_SYNC_REGS:
 364         case KVM_CAP_ONE_REG:
 365         case KVM_CAP_ENABLE_CAP:
 366         case KVM_CAP_S390_CSS_SUPPORT:
 367         case KVM_CAP_IOEVENTFD:
 368         case KVM_CAP_DEVICE_CTRL:
 369         case KVM_CAP_ENABLE_CAP_VM:
 370         case KVM_CAP_S390_IRQCHIP:
 371         case KVM_CAP_VM_ATTRIBUTES:
 372         case KVM_CAP_MP_STATE:
 373         case KVM_CAP_S390_INJECT_IRQ:
 374         case KVM_CAP_S390_USER_SIGP:
 375         case KVM_CAP_S390_USER_STSI:
 376         case KVM_CAP_S390_SKEYS:
 377         case KVM_CAP_S390_IRQ_STATE:
 378         case KVM_CAP_S390_USER_INSTR0:
 379                 r = 1;
 380                 break;
 381         case KVM_CAP_S390_MEM_OP:
 382                 r = MEM_OP_MAX_SIZE;
 383                 break;
 384         case KVM_CAP_NR_VCPUS:
 385         case KVM_CAP_MAX_VCPUS:
 386                 r = KVM_S390_BSCA_CPU_SLOTS;
 387                 if (!kvm_s390_use_sca_entries())
 388                         r = KVM_MAX_VCPUS;
 389                 else if (sclp.has_esca && sclp.has_64bscao)
 390                         r = KVM_S390_ESCA_CPU_SLOTS;
 391                 break;
 392         case KVM_CAP_NR_MEMSLOTS:
 393                 r = KVM_USER_MEM_SLOTS;
 394                 break;
 395         case KVM_CAP_S390_COW:
 396                 r = MACHINE_HAS_ESOP;
 397                 break;
 398         case KVM_CAP_S390_VECTOR_REGISTERS:
 399                 r = MACHINE_HAS_VX;
 400                 break;
 401         case KVM_CAP_S390_RI:
 402                 r = test_facility(64);
 403                 break;
 404         default:
 405                 r = 0;
 406         }
 407         return r;
 408 }
 409
 410 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 411                                         struct kvm_memory_slot *memslot)
 412 {
 413         gfn_t cur_gfn, last_gfn;
 414         unsigned long address;
 415         struct gmap *gmap = kvm->arch.gmap;
 416
 417         /* Loop over all guest pages */
 418         last_gfn = memslot->base_gfn + memslot->npages;
 419         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 420                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 421
 422                 if (test_and_clear_guest_dirty(gmap->mm, address))
 423                         mark_page_dirty(kvm, cur_gfn);
 424                 if (fatal_signal_pending(current))
 425                         return;
 426                 cond_resched();
 427         }
 428 }
 429
 430 /* Section: vm related */
 431 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 432
 433 /*
 434  * Get (and clear) the dirty memory log for a memory slot.
 435  */
 436 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 437                                struct kvm_dirty_log *log)
 438 {
 439         int r;
 440         unsigned long n;
 441         struct kvm_memslots *slots;
 442         struct kvm_memory_slot *memslot;
 443         int is_dirty = 0;
 444
 445         mutex_lock(&kvm->slots_lock);
 446
 447         r = -EINVAL;
 448         if (log->slot >= KVM_USER_MEM_SLOTS)
 449                 goto out;
 450
 451         slots = kvm_memslots(kvm);
 452         memslot = id_to_memslot(slots, log->slot);
 453         r = -ENOENT;
 454         if (!memslot->dirty_bitmap)
 455                 goto out;
 456
 457         kvm_s390_sync_dirty_log(kvm, memslot);
 458         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 459         if (r)
 460                 goto out;
 461
 462         /* Clear the dirty log */
 463         if (is_dirty) {
 464                 n = kvm_dirty_bitmap_bytes(memslot);
 465                 memset(memslot->dirty_bitmap, 0, n);
 466         }
 467         r = 0;
 468 out:
 469         mutex_unlock(&kvm->slots_lock);
 470         return r;
 471 }
 472
 473 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 474 {
 475         unsigned int i;
 476         struct kvm_vcpu *vcpu;
 477
 478         kvm_for_each_vcpu(i, vcpu, kvm) {
 479                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 480         }
 481 }
 482
 483 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 484 {
 485         int r;
 486
 487         if (cap->flags)
 488                 return -EINVAL;
 489
 490         switch (cap->cap) {
 491         case KVM_CAP_S390_IRQCHIP:
 492                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 493                 kvm->arch.use_irqchip = 1;
 494                 r = 0;
 495                 break;
 496         case KVM_CAP_S390_USER_SIGP:
 497                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 498                 kvm->arch.user_sigp = 1;
 499                 r = 0;
 500                 break;
 501         case KVM_CAP_S390_VECTOR_REGISTERS:
 502                 mutex_lock(&kvm->lock);
 503                 if (kvm->created_vcpus) {
 504                         r = -EBUSY;
 505                 } else if (MACHINE_HAS_VX) {
 506                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 507                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 508                         r = 0;
 509                 } else
 510                         r = -EINVAL;
 511                 mutex_unlock(&kvm->lock);
 512                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 513                          r ? "(not available)" : "(success)");
 514                 break;
 515         case KVM_CAP_S390_RI:
 516                 r = -EINVAL;
 517                 mutex_lock(&kvm->lock);
 518                 if (kvm->created_vcpus) {
 519                         r = -EBUSY;
 520                 } else if (test_facility(64)) {
 521                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 522                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 523                         r = 0;
 524                 }
 525                 mutex_unlock(&kvm->lock);
 526                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 527                          r ? "(not available)" : "(success)");
 528                 break;
 529         case KVM_CAP_S390_USER_STSI:
 530                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 531                 kvm->arch.user_stsi = 1;
 532                 r = 0;
 533                 break;
 534         case KVM_CAP_S390_USER_INSTR0:
 535                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 536                 kvm->arch.user_instr0 = 1;
 537                 icpt_operexc_on_all_vcpus(kvm);
 538                 r = 0;
 539                 break;
 540         default:
 541                 r = -EINVAL;
 542                 break;
 543         }
 544         return r;
 545 }
 546
 547 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 548 {
 549         int ret;
 550
 551         switch (attr->attr) {
 552         case KVM_S390_VM_MEM_LIMIT_SIZE:
 553                 ret = 0;
 554                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 555                          kvm->arch.mem_limit);
 556                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 557                         ret = -EFAULT;
 558                 break;
 559         default:
 560                 ret = -ENXIO;
 561                 break;
 562         }
 563         return ret;
 564 }
 565
 566 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 567 {
 568         int ret;
 569         unsigned int idx;
 570         switch (attr->attr) {
 571         case KVM_S390_VM_MEM_ENABLE_CMMA:
 572                 ret = -ENXIO;
 573                 if (!sclp.has_cmma)
 574                         break;
 575
 576                 ret = -EBUSY;
 577                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 578                 mutex_lock(&kvm->lock);
 579                 if (!kvm->created_vcpus) {
 580                         kvm->arch.use_cmma = 1;
 581                         ret = 0;
 582                 }
 583                 mutex_unlock(&kvm->lock);
 584                 break;
 585         case KVM_S390_VM_MEM_CLR_CMMA:
 586                 ret = -ENXIO;
 587                 if (!sclp.has_cmma)
 588                         break;
 589                 ret = -EINVAL;
 590                 if (!kvm->arch.use_cmma)
 591                         break;
 592
 593                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 594                 mutex_lock(&kvm->lock);
 595                 idx = srcu_read_lock(&kvm->srcu);
 596                 s390_reset_cmma(kvm->arch.gmap->mm);
 597                 srcu_read_unlock(&kvm->srcu, idx);
 598                 mutex_unlock(&kvm->lock);
 599                 ret = 0;
 600                 break;
 601         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 602                 unsigned long new_limit;
 603
 604                 if (kvm_is_ucontrol(kvm))
 605                         return -EINVAL;
 606
 607                 if (get_user(new_limit, (u64 __user *)attr->addr))
 608                         return -EFAULT;
 609
 610                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 611                     new_limit > kvm->arch.mem_limit)
 612                         return -E2BIG;
 613
 614                 if (!new_limit)
 615                         return -EINVAL;
 616
 617                 /* gmap_create takes last usable address */
 618                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 619                         new_limit -= 1;
 620
 621                 ret = -EBUSY;
 622                 mutex_lock(&kvm->lock);
 623                 if (!kvm->created_vcpus) {
 624                         /* gmap_create will round the limit up */
 625                         struct gmap *new = gmap_create(current->mm, new_limit);
 626
 627                         if (!new) {
 628                                 ret = -ENOMEM;
 629                         } else {
 630                                 gmap_remove(kvm->arch.gmap);
 631                                 new->private = kvm;
 632                                 kvm->arch.gmap = new;
 633                                 ret = 0;
 634                         }
 635                 }
 636                 mutex_unlock(&kvm->lock);
 637                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 638                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 639                          (void *) kvm->arch.gmap->asce);
 640                 break;
 641         }
 642         default:
 643                 ret = -ENXIO;
 644                 break;
 645         }
 646         return ret;
 647 }
 648
 649 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 650
 651 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 652 {
 653         struct kvm_vcpu *vcpu;
 654         int i;
 655
 656         if (!test_kvm_facility(kvm, 76))
 657                 return -EINVAL;
 658
 659         mutex_lock(&kvm->lock);
 660         switch (attr->attr) {
 661         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 662                 get_random_bytes(
 663                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 664                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 665                 kvm->arch.crypto.aes_kw = 1;
 666                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 667                 break;
 668         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 669                 get_random_bytes(
 670                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 671                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 672                 kvm->arch.crypto.dea_kw = 1;
 673                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 674                 break;
 675         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 676                 kvm->arch.crypto.aes_kw = 0;
 677                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 678                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 679                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 680                 break;
 681         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 682                 kvm->arch.crypto.dea_kw = 0;
 683                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 684                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 685                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 686                 break;
 687         default:
 688                 mutex_unlock(&kvm->lock);
 689                 return -ENXIO;
 690         }
 691
 692         kvm_for_each_vcpu(i, vcpu, kvm) {
 693                 kvm_s390_vcpu_crypto_setup(vcpu);
 694                 exit_sie(vcpu);
 695         }
 696         mutex_unlock(&kvm->lock);
 697         return 0;
 698 }
 699
 700 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 701 {
 702         u8 gtod_high;
 703
 704         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 705                                            sizeof(gtod_high)))
 706                 return -EFAULT;
 707
 708         if (gtod_high != 0)
 709                 return -EINVAL;
 710         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 711
 712         return 0;
 713 }
 714
 715 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 716 {
 717         u64 gtod;
 718
 719         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 720                 return -EFAULT;
 721
 722         kvm_s390_set_tod_clock(kvm, gtod);
 723         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 724         return 0;
 725 }
 726
 727 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 728 {
 729         int ret;
 730
 731         if (attr->flags)
 732                 return -EINVAL;
 733
 734         switch (attr->attr) {
 735         case KVM_S390_VM_TOD_HIGH:
 736                 ret = kvm_s390_set_tod_high(kvm, attr);
 737                 break;
 738         case KVM_S390_VM_TOD_LOW:
 739                 ret = kvm_s390_set_tod_low(kvm, attr);
 740                 break;
 741         default:
 742                 ret = -ENXIO;
 743                 break;
 744         }
 745         return ret;
 746 }
 747
 748 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 749 {
 750         u8 gtod_high = 0;
 751
 752         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 753                                          sizeof(gtod_high)))
 754                 return -EFAULT;
 755         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 756
 757         return 0;
 758 }
 759
 760 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 761 {
 762         u64 gtod;
 763
 764         gtod = kvm_s390_get_tod_clock_fast(kvm);
 765         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 766                 return -EFAULT;
 767         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 768
 769         return 0;
 770 }
 771
 772 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 773 {
 774         int ret;
 775
 776         if (attr->flags)
 777                 return -EINVAL;
 778
 779         switch (attr->attr) {
 780         case KVM_S390_VM_TOD_HIGH:
 781                 ret = kvm_s390_get_tod_high(kvm, attr);
 782                 break;
 783         case KVM_S390_VM_TOD_LOW:
 784                 ret = kvm_s390_get_tod_low(kvm, attr);
 785                 break;
 786         default:
 787                 ret = -ENXIO;
 788                 break;
 789         }
 790         return ret;
 791 }
 792
 793 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 794 {
 795         struct kvm_s390_vm_cpu_processor *proc;
 796         u16 lowest_ibc, unblocked_ibc;
 797         int ret = 0;
 798
 799         mutex_lock(&kvm->lock);
 800         if (kvm->created_vcpus) {
 801                 ret = -EBUSY;
 802                 goto out;
 803         }
 804         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 805         if (!proc) {
 806                 ret = -ENOMEM;
 807                 goto out;
 808         }
 809         if (!copy_from_user(proc, (void __user *)attr->addr,
 810                             sizeof(*proc))) {
 811                 kvm->arch.model.cpuid = proc->cpuid;
 812                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 813                 unblocked_ibc = sclp.ibc & 0xfff;
 814                 if (lowest_ibc && proc->ibc) {
 815                         if (proc->ibc > unblocked_ibc)
 816                                 kvm->arch.model.ibc = unblocked_ibc;
 817                         else if (proc->ibc < lowest_ibc)
 818                                 kvm->arch.model.ibc = lowest_ibc;
 819                         else
 820                                 kvm->arch.model.ibc = proc->ibc;
 821                 }
 822                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 823                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 824         } else
 825                 ret = -EFAULT;
 826         kfree(proc);
 827 out:
 828         mutex_unlock(&kvm->lock);
 829         return ret;
 830 }
 831
 832 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 833                                        struct kvm_device_attr *attr)
 834 {
 835         struct kvm_s390_vm_cpu_feat data;
 836         int ret = -EBUSY;
 837
 838         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 839                 return -EFAULT;
 840         if (!bitmap_subset((unsigned long *) data.feat,
 841                            kvm_s390_available_cpu_feat,
 842                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 843                 return -EINVAL;
 844
 845         mutex_lock(&kvm->lock);
 846         if (!atomic_read(&kvm->online_vcpus)) {
 847                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 848                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 849                 ret = 0;
 850         }
 851         mutex_unlock(&kvm->lock);
 852         return ret;
 853 }
 854
 855 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 856                                           struct kvm_device_attr *attr)
 857 {
 858         /*
 859          * Once supported by kernel + hw, we have to store the subfunctions
 860          * in kvm->arch and remember that user space configured them.
 861          */
 862         return -ENXIO;
 863 }
 864
 865 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 866 {
 867         int ret = -ENXIO;
 868
 869         switch (attr->attr) {
 870         case KVM_S390_VM_CPU_PROCESSOR:
 871                 ret = kvm_s390_set_processor(kvm, attr);
 872                 break;
 873         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 874                 ret = kvm_s390_set_processor_feat(kvm, attr);
 875                 break;
 876         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 877                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 878                 break;
 879         }
 880         return ret;
 881 }
 882
 883 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 884 {
 885         struct kvm_s390_vm_cpu_processor *proc;
 886         int ret = 0;
 887
 888         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 889         if (!proc) {
 890                 ret = -ENOMEM;
 891                 goto out;
 892         }
 893         proc->cpuid = kvm->arch.model.cpuid;
 894         proc->ibc = kvm->arch.model.ibc;
 895         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 896                S390_ARCH_FAC_LIST_SIZE_BYTE);
 897         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 898                 ret = -EFAULT;
 899         kfree(proc);
 900 out:
 901         return ret;
 902 }
 903
 904 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 905 {
 906         struct kvm_s390_vm_cpu_machine *mach;
 907         int ret = 0;
 908
 909         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 910         if (!mach) {
 911                 ret = -ENOMEM;
 912                 goto out;
 913         }
 914         get_cpu_id((struct cpuid *) &mach->cpuid);
 915         mach->ibc = sclp.ibc;
 916         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 917                S390_ARCH_FAC_LIST_SIZE_BYTE);
 918         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 919                S390_ARCH_FAC_LIST_SIZE_BYTE);
 920         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 921                 ret = -EFAULT;
 922         kfree(mach);
 923 out:
 924         return ret;
 925 }
 926
 927 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 928                                        struct kvm_device_attr *attr)
 929 {
 930         struct kvm_s390_vm_cpu_feat data;
 931
 932         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 933                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 934         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 935                 return -EFAULT;
 936         return 0;
 937 }
 938
 939 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 940                                      struct kvm_device_attr *attr)
 941 {
 942         struct kvm_s390_vm_cpu_feat data;
 943
 944         bitmap_copy((unsigned long *) data.feat,
 945                     kvm_s390_available_cpu_feat,
 946                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 947         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 948                 return -EFAULT;
 949         return 0;
 950 }
 951
 952 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 953                                           struct kvm_device_attr *attr)
 954 {
 955         /*
 956          * Once we can actually configure subfunctions (kernel + hw support),
 957          * we have to check if they were already set by user space, if so copy
 958          * them from kvm->arch.
 959          */
 960         return -ENXIO;
 961 }
 962
 963 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 964                                         struct kvm_device_attr *attr)
 965 {
 966         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 967             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 968                 return -EFAULT;
 969         return 0;
 970 }
 971 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 972 {
 973         int ret = -ENXIO;
 974
 975         switch (attr->attr) {
 976         case KVM_S390_VM_CPU_PROCESSOR:
 977                 ret = kvm_s390_get_processor(kvm, attr);
 978                 break;
 979         case KVM_S390_VM_CPU_MACHINE:
 980                 ret = kvm_s390_get_machine(kvm, attr);
 981                 break;
 982         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 983                 ret = kvm_s390_get_processor_feat(kvm, attr);
 984                 break;
 985         case KVM_S390_VM_CPU_MACHINE_FEAT:
 986                 ret = kvm_s390_get_machine_feat(kvm, attr);
 987                 break;
 988         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 989                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 990                 break;
 991         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 992                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 993                 break;
 994         }
 995         return ret;
 996 }
 997
 998 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 999 {
1000         int ret;
1001
1002         switch (attr->group) {
1003         case KVM_S390_VM_MEM_CTRL:
1004                 ret = kvm_s390_set_mem_control(kvm, attr);
1005                 break;
1006         case KVM_S390_VM_TOD:
1007                 ret = kvm_s390_set_tod(kvm, attr);
1008                 break;
1009         case KVM_S390_VM_CPU_MODEL:
1010                 ret = kvm_s390_set_cpu_model(kvm, attr);
1011                 break;
1012         case KVM_S390_VM_CRYPTO:
1013                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1014                 break;
1015         default:
1016                 ret = -ENXIO;
1017                 break;
1018         }
1019
1020         return ret;
1021 }
1022
1023 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1024 {
1025         int ret;
1026
1027         switch (attr->group) {
1028         case KVM_S390_VM_MEM_CTRL:
1029                 ret = kvm_s390_get_mem_control(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_TOD:
1032                 ret = kvm_s390_get_tod(kvm, attr);
1033                 break;
1034         case KVM_S390_VM_CPU_MODEL:
1035                 ret = kvm_s390_get_cpu_model(kvm, attr);
1036                 break;
1037         default:
1038                 ret = -ENXIO;
1039                 break;
1040         }
1041
1042         return ret;
1043 }
1044
1045 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1046 {
1047         int ret;
1048
1049         switch (attr->group) {
1050         case KVM_S390_VM_MEM_CTRL:
1051                 switch (attr->attr) {
1052                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1053                 case KVM_S390_VM_MEM_CLR_CMMA:
1054                         ret = sclp.has_cmma ? 0 : -ENXIO;
1055                         break;
1056                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1057                         ret = 0;
1058                         break;
1059                 default:
1060                         ret = -ENXIO;
1061                         break;
1062                 }
1063                 break;
1064         case KVM_S390_VM_TOD:
1065                 switch (attr->attr) {
1066                 case KVM_S390_VM_TOD_LOW:
1067                 case KVM_S390_VM_TOD_HIGH:
1068                         ret = 0;
1069                         break;
1070                 default:
1071                         ret = -ENXIO;
1072                         break;
1073                 }
1074                 break;
1075         case KVM_S390_VM_CPU_MODEL:
1076                 switch (attr->attr) {
1077                 case KVM_S390_VM_CPU_PROCESSOR:
1078                 case KVM_S390_VM_CPU_MACHINE:
1079                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1080                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1081                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1082                         ret = 0;
1083                         break;
1084                 /* configuring subfunctions is not supported yet */
1085                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1086                 default:
1087                         ret = -ENXIO;
1088                         break;
1089                 }
1090                 break;
1091         case KVM_S390_VM_CRYPTO:
1092                 switch (attr->attr) {
1093                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1094                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1095                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1096                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1097                         ret = 0;
1098                         break;
1099                 default:
1100                         ret = -ENXIO;
1101                         break;
1102                 }
1103                 break;
1104         default:
1105                 ret = -ENXIO;
1106                 break;
1107         }
1108
1109         return ret;
1110 }
1111
1112 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1113 {
1114         uint8_t *keys;
1115         uint64_t hva;
1116         int i, r = 0;
1117
1118         if (args->flags != 0)
1119                 return -EINVAL;
1120
1121         /* Is this guest using storage keys? */
1122         if (!mm_use_skey(current->mm))
1123                 return KVM_S390_GET_SKEYS_NONE;
1124
1125         /* Enforce sane limit on memory allocation */
1126         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1127                 return -EINVAL;
1128
1129         keys = kmalloc_array(args->count, sizeof(uint8_t),
1130                              GFP_KERNEL | __GFP_NOWARN);
1131         if (!keys)
1132                 keys = vmalloc(sizeof(uint8_t) * args->count);
1133         if (!keys)
1134                 return -ENOMEM;
1135
1136         down_read(&current->mm->mmap_sem);
1137         for (i = 0; i < args->count; i++) {
1138                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1139                 if (kvm_is_error_hva(hva)) {
1140                         r = -EFAULT;
1141                         break;
1142                 }
1143
1144                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1145                 if (r)
1146                         break;
1147         }
1148         up_read(&current->mm->mmap_sem);
1149
1150         if (!r) {
1151                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1152                                  sizeof(uint8_t) * args->count);
1153                 if (r)
1154                         r = -EFAULT;
1155         }
1156
1157         kvfree(keys);
1158         return r;
1159 }
1160
1161 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1162 {
1163         uint8_t *keys;
1164         uint64_t hva;
1165         int i, r = 0;
1166
1167         if (args->flags != 0)
1168                 return -EINVAL;
1169
1170         /* Enforce sane limit on memory allocation */
1171         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1172                 return -EINVAL;
1173
1174         keys = kmalloc_array(args->count, sizeof(uint8_t),
1175                              GFP_KERNEL | __GFP_NOWARN);
1176         if (!keys)
1177                 keys = vmalloc(sizeof(uint8_t) * args->count);
1178         if (!keys)
1179                 return -ENOMEM;
1180
1181         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1182                            sizeof(uint8_t) * args->count);
1183         if (r) {
1184                 r = -EFAULT;
1185                 goto out;
1186         }
1187
1188         /* Enable storage key handling for the guest */
1189         r = s390_enable_skey();
1190         if (r)
1191                 goto out;
1192
1193         down_read(&current->mm->mmap_sem);
1194         for (i = 0; i < args->count; i++) {
1195                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1196                 if (kvm_is_error_hva(hva)) {
1197                         r = -EFAULT;
1198                         break;
1199                 }
1200
1201                 /* Lowest order bit is reserved */
1202                 if (keys[i] & 0x01) {
1203                         r = -EINVAL;
1204                         break;
1205                 }
1206
1207                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1208                 if (r)
1209                         break;
1210         }
1211         up_read(&current->mm->mmap_sem);
1212 out:
1213         kvfree(keys);
1214         return r;
1215 }
1216
1217 long kvm_arch_vm_ioctl(struct file *filp,
1218                        unsigned int ioctl, unsigned long arg)
1219 {
1220         struct kvm *kvm = filp->private_data;
1221         void __user *argp = (void __user *)arg;
1222         struct kvm_device_attr attr;
1223         int r;
1224
1225         switch (ioctl) {
1226         case KVM_S390_INTERRUPT: {
1227                 struct kvm_s390_interrupt s390int;
1228
1229                 r = -EFAULT;
1230                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1231                         break;
1232                 r = kvm_s390_inject_vm(kvm, &s390int);
1233                 break;
1234         }
1235         case KVM_ENABLE_CAP: {
1236                 struct kvm_enable_cap cap;
1237                 r = -EFAULT;
1238                 if (copy_from_user(&cap, argp, sizeof(cap)))
1239                         break;
1240                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1241                 break;
1242         }
1243         case KVM_CREATE_IRQCHIP: {
1244                 struct kvm_irq_routing_entry routing;
1245
1246                 r = -EINVAL;
1247                 if (kvm->arch.use_irqchip) {
1248                         /* Set up dummy routing. */
1249                         memset(&routing, 0, sizeof(routing));
1250                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1251                 }
1252                 break;
1253         }
1254         case KVM_SET_DEVICE_ATTR: {
1255                 r = -EFAULT;
1256                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1257                         break;
1258                 r = kvm_s390_vm_set_attr(kvm, &attr);
1259                 break;
1260         }
1261         case KVM_GET_DEVICE_ATTR: {
1262                 r = -EFAULT;
1263                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1264                         break;
1265                 r = kvm_s390_vm_get_attr(kvm, &attr);
1266                 break;
1267         }
1268         case KVM_HAS_DEVICE_ATTR: {
1269                 r = -EFAULT;
1270                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1271                         break;
1272                 r = kvm_s390_vm_has_attr(kvm, &attr);
1273                 break;
1274         }
1275         case KVM_S390_GET_SKEYS: {
1276                 struct kvm_s390_skeys args;
1277
1278                 r = -EFAULT;
1279                 if (copy_from_user(&args, argp,
1280                                    sizeof(struct kvm_s390_skeys)))
1281                         break;
1282                 r = kvm_s390_get_skeys(kvm, &args);
1283                 break;
1284         }
1285         case KVM_S390_SET_SKEYS: {
1286                 struct kvm_s390_skeys args;
1287
1288                 r = -EFAULT;
1289                 if (copy_from_user(&args, argp,
1290                                    sizeof(struct kvm_s390_skeys)))
1291                         break;
1292                 r = kvm_s390_set_skeys(kvm, &args);
1293                 break;
1294         }
1295         default:
1296                 r = -ENOTTY;
1297         }
1298
1299         return r;
1300 }
1301
1302 static int kvm_s390_query_ap_config(u8 *config)
1303 {
1304         u32 fcn_code = 0x04000000UL;
1305         u32 cc = 0;
1306
1307         memset(config, 0, 128);
1308         asm volatile(
1309                 "lgr 0,%1\n"
1310                 "lgr 2,%2\n"
1311                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1312                 "0: ipm %0\n"
1313                 "srl %0,28\n"
1314                 "1:\n"
1315                 EX_TABLE(0b, 1b)
1316                 : "+r" (cc)
1317                 : "r" (fcn_code), "r" (config)
1318                 : "cc", "0", "2", "memory"
1319         );
1320
1321         return cc;
1322 }
1323
1324 static int kvm_s390_apxa_installed(void)
1325 {
1326         u8 config[128];
1327         int cc;
1328
1329         if (test_facility(12)) {
1330                 cc = kvm_s390_query_ap_config(config);
1331
1332                 if (cc)
1333                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1334                 else
1335                         return config[0] & 0x40;
1336         }
1337
1338         return 0;
1339 }
1340
1341 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1342 {
1343         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1344
1345         if (kvm_s390_apxa_installed())
1346                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1347         else
1348                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1349 }
1350
1351 static u64 kvm_s390_get_initial_cpuid(void)
1352 {
1353         struct cpuid cpuid;
1354
1355         get_cpu_id(&cpuid);
1356         cpuid.version = 0xff;
1357         return *((u64 *) &cpuid);
1358 }
1359
1360 static void kvm_s390_crypto_init(struct kvm *kvm)
1361 {
1362         if (!test_kvm_facility(kvm, 76))
1363                 return;
1364
1365         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1366         kvm_s390_set_crycb_format(kvm);
1367
1368         /* Enable AES/DEA protected key functions by default */
1369         kvm->arch.crypto.aes_kw = 1;
1370         kvm->arch.crypto.dea_kw = 1;
1371         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1372                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1373         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1374                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1375 }
1376
1377 static void sca_dispose(struct kvm *kvm)
1378 {
1379         if (kvm->arch.use_esca)
1380                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1381         else
1382                 free_page((unsigned long)(kvm->arch.sca));
1383         kvm->arch.sca = NULL;
1384 }
1385
1386 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1387 {
1388         gfp_t alloc_flags = GFP_KERNEL;
1389         int i, rc;
1390         char debug_name[16];
1391         static unsigned long sca_offset;
1392
1393         rc = -EINVAL;
1394 #ifdef CONFIG_KVM_S390_UCONTROL
1395         if (type & ~KVM_VM_S390_UCONTROL)
1396                 goto out_err;
1397         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1398                 goto out_err;
1399 #else
1400         if (type)
1401                 goto out_err;
1402 #endif
1403
1404         rc = s390_enable_sie();
1405         if (rc)
1406                 goto out_err;
1407
1408         rc = -ENOMEM;
1409
1410         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1411
1412         kvm->arch.use_esca = 0; /* start with basic SCA */
1413         if (!sclp.has_64bscao)
1414                 alloc_flags |= GFP_DMA;
1415         rwlock_init(&kvm->arch.sca_lock);
1416         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1417         if (!kvm->arch.sca)
1418                 goto out_err;
1419         spin_lock(&kvm_lock);
1420         sca_offset += 16;
1421         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1422                 sca_offset = 0;
1423         kvm->arch.sca = (struct bsca_block *)
1424                         ((char *) kvm->arch.sca + sca_offset);
1425         spin_unlock(&kvm_lock);
1426
1427         sprintf(debug_name, "kvm-%u", current->pid);
1428
1429         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1430         if (!kvm->arch.dbf)
1431                 goto out_err;
1432
1433         kvm->arch.sie_page2 =
1434              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1435         if (!kvm->arch.sie_page2)
1436                 goto out_err;
1437
1438         /* Populate the facility mask initially. */
1439         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1440                S390_ARCH_FAC_LIST_SIZE_BYTE);
1441         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1442                 if (i < kvm_s390_fac_list_mask_size())
1443                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1444                 else
1445                         kvm->arch.model.fac_mask[i] = 0UL;
1446         }
1447
1448         /* Populate the facility list initially. */
1449         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1450         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1451                S390_ARCH_FAC_LIST_SIZE_BYTE);
1452
1453         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1454         set_kvm_facility(kvm->arch.model.fac_list, 74);
1455
1456         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1457         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1458
1459         kvm_s390_crypto_init(kvm);
1460
1461         spin_lock_init(&kvm->arch.float_int.lock);
1462         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1463                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1464         init_waitqueue_head(&kvm->arch.ipte_wq);
1465         mutex_init(&kvm->arch.ipte_mutex);
1466
1467         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1468         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1469
1470         if (type & KVM_VM_S390_UCONTROL) {
1471                 kvm->arch.gmap = NULL;
1472                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1473         } else {
1474                 if (sclp.hamax == U64_MAX)
1475                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1476                 else
1477                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1478                                                     sclp.hamax + 1);
1479                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1480                 if (!kvm->arch.gmap)
1481                         goto out_err;
1482                 kvm->arch.gmap->private = kvm;
1483                 kvm->arch.gmap->pfault_enabled = 0;
1484         }
1485
1486         kvm->arch.css_support = 0;
1487         kvm->arch.use_irqchip = 0;
1488         kvm->arch.epoch = 0;
1489
1490         spin_lock_init(&kvm->arch.start_stop_lock);
1491         kvm_s390_vsie_init(kvm);
1492         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1493
1494         return 0;
1495 out_err:
1496         free_page((unsigned long)kvm->arch.sie_page2);
1497         debug_unregister(kvm->arch.dbf);
1498         sca_dispose(kvm);
1499         KVM_EVENT(3, "creation of vm failed: %d", rc);
1500         return rc;
1501 }
1502
1503 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1504 {
1505         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1506         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1507         kvm_s390_clear_local_irqs(vcpu);
1508         kvm_clear_async_pf_completion_queue(vcpu);
1509         if (!kvm_is_ucontrol(vcpu->kvm))
1510                 sca_del_vcpu(vcpu);
1511
1512         if (kvm_is_ucontrol(vcpu->kvm))
1513                 gmap_remove(vcpu->arch.gmap);
1514
1515         if (vcpu->kvm->arch.use_cmma)
1516                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1517         free_page((unsigned long)(vcpu->arch.sie_block));
1518
1519         kvm_vcpu_uninit(vcpu);
1520         kmem_cache_free(kvm_vcpu_cache, vcpu);
1521 }
1522
1523 static void kvm_free_vcpus(struct kvm *kvm)
1524 {
1525         unsigned int i;
1526         struct kvm_vcpu *vcpu;
1527
1528         kvm_for_each_vcpu(i, vcpu, kvm)
1529                 kvm_arch_vcpu_destroy(vcpu);
1530
1531         mutex_lock(&kvm->lock);
1532         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1533                 kvm->vcpus[i] = NULL;
1534
1535         atomic_set(&kvm->online_vcpus, 0);
1536         mutex_unlock(&kvm->lock);
1537 }
1538
1539 void kvm_arch_destroy_vm(struct kvm *kvm)
1540 {
1541         kvm_free_vcpus(kvm);
1542         sca_dispose(kvm);
1543         debug_unregister(kvm->arch.dbf);
1544         free_page((unsigned long)kvm->arch.sie_page2);
1545         if (!kvm_is_ucontrol(kvm))
1546                 gmap_remove(kvm->arch.gmap);
1547         kvm_s390_destroy_adapters(kvm);
1548         kvm_s390_clear_float_irqs(kvm);
1549         kvm_s390_vsie_destroy(kvm);
1550         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1551 }
1552
1553 /* Section: vcpu related */
1554 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1555 {
1556         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1557         if (!vcpu->arch.gmap)
1558                 return -ENOMEM;
1559         vcpu->arch.gmap->private = vcpu->kvm;
1560
1561         return 0;
1562 }
1563
1564 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1565 {
1566         if (!kvm_s390_use_sca_entries())
1567                 return;
1568         read_lock(&vcpu->kvm->arch.sca_lock);
1569         if (vcpu->kvm->arch.use_esca) {
1570                 struct esca_block *sca = vcpu->kvm->arch.sca;
1571
1572                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1573                 sca->cpu[vcpu->vcpu_id].sda = 0;
1574         } else {
1575                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1576
1577                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1578                 sca->cpu[vcpu->vcpu_id].sda = 0;
1579         }
1580         read_unlock(&vcpu->kvm->arch.sca_lock);
1581 }
1582
1583 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1584 {
1585         if (!kvm_s390_use_sca_entries()) {
1586                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1587
1588                 /* we still need the basic sca for the ipte control */
1589                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1590                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1591         }
1592         read_lock(&vcpu->kvm->arch.sca_lock);
1593         if (vcpu->kvm->arch.use_esca) {
1594                 struct esca_block *sca = vcpu->kvm->arch.sca;
1595
1596                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1597                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1598                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1599                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1600                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1601         } else {
1602                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1603
1604                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1605                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1606                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1607                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1608         }
1609         read_unlock(&vcpu->kvm->arch.sca_lock);
1610 }
1611
1612 /* Basic SCA to Extended SCA data copy routines */
1613 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1614 {
1615         d->sda = s->sda;
1616         d->sigp_ctrl.c = s->sigp_ctrl.c;
1617         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1618 }
1619
1620 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1621 {
1622         int i;
1623
1624         d->ipte_control = s->ipte_control;
1625         d->mcn[0] = s->mcn;
1626         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1627                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1628 }
1629
1630 static int sca_switch_to_extended(struct kvm *kvm)
1631 {
1632         struct bsca_block *old_sca = kvm->arch.sca;
1633         struct esca_block *new_sca;
1634         struct kvm_vcpu *vcpu;
1635         unsigned int vcpu_idx;
1636         u32 scaol, scaoh;
1637
1638         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1639         if (!new_sca)
1640                 return -ENOMEM;
1641
1642         scaoh = (u32)((u64)(new_sca) >> 32);
1643         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1644
1645         kvm_s390_vcpu_block_all(kvm);
1646         write_lock(&kvm->arch.sca_lock);
1647
1648         sca_copy_b_to_e(new_sca, old_sca);
1649
1650         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1651                 vcpu->arch.sie_block->scaoh = scaoh;
1652                 vcpu->arch.sie_block->scaol = scaol;
1653                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1654         }
1655         kvm->arch.sca = new_sca;
1656         kvm->arch.use_esca = 1;
1657
1658         write_unlock(&kvm->arch.sca_lock);
1659         kvm_s390_vcpu_unblock_all(kvm);
1660
1661         free_page((unsigned long)old_sca);
1662
1663         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1664                  old_sca, kvm->arch.sca);
1665         return 0;
1666 }
1667
1668 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1669 {
1670         int rc;
1671
1672         if (!kvm_s390_use_sca_entries()) {
1673                 if (id < KVM_MAX_VCPUS)
1674                         return true;
1675                 return false;
1676         }
1677         if (id < KVM_S390_BSCA_CPU_SLOTS)
1678                 return true;
1679         if (!sclp.has_esca || !sclp.has_64bscao)
1680                 return false;
1681
1682         mutex_lock(&kvm->lock);
1683         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1684         mutex_unlock(&kvm->lock);
1685
1686         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1687 }
1688
1689 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1690 {
1691         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1692         kvm_clear_async_pf_completion_queue(vcpu);
1693         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1694                                     KVM_SYNC_GPRS |
1695                                     KVM_SYNC_ACRS |
1696                                     KVM_SYNC_CRS |
1697                                     KVM_SYNC_ARCH0 |
1698                                     KVM_SYNC_PFAULT;
1699         kvm_s390_set_prefix(vcpu, 0);
1700         if (test_kvm_facility(vcpu->kvm, 64))
1701                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1702         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1703          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1704          */
1705         if (MACHINE_HAS_VX)
1706                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1707         else
1708                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1709
1710         if (kvm_is_ucontrol(vcpu->kvm))
1711                 return __kvm_ucontrol_vcpu_init(vcpu);
1712
1713         return 0;
1714 }
1715
1716 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1717 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1718 {
1719         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1720         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1721         vcpu->arch.cputm_start = get_tod_clock_fast();
1722         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1723 }
1724
1725 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1726 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1727 {
1728         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1729         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1730         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1731         vcpu->arch.cputm_start = 0;
1732         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1733 }
1734
1735 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1736 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1737 {
1738         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1739         vcpu->arch.cputm_enabled = true;
1740         __start_cpu_timer_accounting(vcpu);
1741 }
1742
1743 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1744 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1745 {
1746         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1747         __stop_cpu_timer_accounting(vcpu);
1748         vcpu->arch.cputm_enabled = false;
1749 }
1750
1751 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1752 {
1753         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1754         __enable_cpu_timer_accounting(vcpu);
1755         preempt_enable();
1756 }
1757
1758 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1759 {
1760         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1761         __disable_cpu_timer_accounting(vcpu);
1762         preempt_enable();
1763 }
1764
1765 /* set the cpu timer - may only be called from the VCPU thread itself */
1766 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1767 {
1768         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1769         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1770         if (vcpu->arch.cputm_enabled)
1771                 vcpu->arch.cputm_start = get_tod_clock_fast();
1772         vcpu->arch.sie_block->cputm = cputm;
1773         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1774         preempt_enable();
1775 }
1776
1777 /* update and get the cpu timer - can also be called from other VCPU threads */
1778 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1779 {
1780         unsigned int seq;
1781         __u64 value;
1782
1783         if (unlikely(!vcpu->arch.cputm_enabled))
1784                 return vcpu->arch.sie_block->cputm;
1785
1786         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1787         do {
1788                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1789                 /*
1790                  * If the writer would ever execute a read in the critical
1791                  * section, e.g. in irq context, we have a deadlock.
1792                  */
1793                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1794                 value = vcpu->arch.sie_block->cputm;
1795                 /* if cputm_start is 0, accounting is being started/stopped */
1796                 if (likely(vcpu->arch.cputm_start))
1797                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1798         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1799         preempt_enable();
1800         return value;
1801 }
1802
1803 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1804 {
1805         /* Save host register state */
1806         save_fpu_regs();
1807         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1808         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1809
1810         if (MACHINE_HAS_VX)
1811                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1812         else
1813                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1814         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1815         if (test_fp_ctl(current->thread.fpu.fpc))
1816                 /* User space provided an invalid FPC, let's clear it */
1817                 current->thread.fpu.fpc = 0;
1818
1819         save_access_regs(vcpu->arch.host_acrs);
1820         restore_access_regs(vcpu->run->s.regs.acrs);
1821         gmap_enable(vcpu->arch.enabled_gmap);
1822         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1823         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1824                 __start_cpu_timer_accounting(vcpu);
1825         vcpu->cpu = cpu;
1826 }
1827
1828 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1829 {
1830         vcpu->cpu = -1;
1831         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1832                 __stop_cpu_timer_accounting(vcpu);
1833         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1834         vcpu->arch.enabled_gmap = gmap_get_enabled();
1835         gmap_disable(vcpu->arch.enabled_gmap);
1836
1837         /* Save guest register state */
1838         save_fpu_regs();
1839         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1840
1841         /* Restore host register state */
1842         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1843         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1844
1845         save_access_regs(vcpu->run->s.regs.acrs);
1846         restore_access_regs(vcpu->arch.host_acrs);
1847 }
1848
1849 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1850 {
1851         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1852         vcpu->arch.sie_block->gpsw.mask = 0UL;
1853         vcpu->arch.sie_block->gpsw.addr = 0UL;
1854         kvm_s390_set_prefix(vcpu, 0);
1855         kvm_s390_set_cpu_timer(vcpu, 0);
1856         vcpu->arch.sie_block->ckc       = 0UL;
1857         vcpu->arch.sie_block->todpr     = 0;
1858         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1859         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1860         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1861         /* make sure the new fpc will be lazily loaded */
1862         save_fpu_regs();
1863         current->thread.fpu.fpc = 0;
1864         vcpu->arch.sie_block->gbea = 1;
1865         vcpu->arch.sie_block->pp = 0;
1866         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1867         kvm_clear_async_pf_completion_queue(vcpu);
1868         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1869                 kvm_s390_vcpu_stop(vcpu);
1870         kvm_s390_clear_local_irqs(vcpu);
1871 }
1872
1873 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1874 {
1875         mutex_lock(&vcpu->kvm->lock);
1876         preempt_disable();
1877         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1878         preempt_enable();
1879         mutex_unlock(&vcpu->kvm->lock);
1880         if (!kvm_is_ucontrol(vcpu->kvm)) {
1881                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1882                 sca_add_vcpu(vcpu);
1883         }
1884         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1885                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1886         /* make vcpu_load load the right gmap on the first trigger */
1887         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1888 }
1889
1890 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1891 {
1892         if (!test_kvm_facility(vcpu->kvm, 76))
1893                 return;
1894
1895         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1896
1897         if (vcpu->kvm->arch.crypto.aes_kw)
1898                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1899         if (vcpu->kvm->arch.crypto.dea_kw)
1900                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1901
1902         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1903 }
1904
1905 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1906 {
1907         free_page(vcpu->arch.sie_block->cbrlo);
1908         vcpu->arch.sie_block->cbrlo = 0;
1909 }
1910
1911 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1912 {
1913         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1914         if (!vcpu->arch.sie_block->cbrlo)
1915                 return -ENOMEM;
1916
1917         vcpu->arch.sie_block->ecb2 |= 0x80;
1918         vcpu->arch.sie_block->ecb2 &= ~0x08;
1919         return 0;
1920 }
1921
1922 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1923 {
1924         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1925
1926         vcpu->arch.sie_block->ibc = model->ibc;
1927         if (test_kvm_facility(vcpu->kvm, 7))
1928                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1929 }
1930
1931 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1932 {
1933         int rc = 0;
1934
1935         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1936                                                     CPUSTAT_SM |
1937                                                     CPUSTAT_STOPPED);
1938
1939         if (test_kvm_facility(vcpu->kvm, 78))
1940                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1941         else if (test_kvm_facility(vcpu->kvm, 8))
1942                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1943
1944         kvm_s390_vcpu_setup_model(vcpu);
1945
1946         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1947         if (MACHINE_HAS_ESOP)
1948                 vcpu->arch.sie_block->ecb |= 0x02;
1949         if (test_kvm_facility(vcpu->kvm, 9))
1950                 vcpu->arch.sie_block->ecb |= 0x04;
1951         if (test_kvm_facility(vcpu->kvm, 73))
1952                 vcpu->arch.sie_block->ecb |= 0x10;
1953
1954         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1955                 vcpu->arch.sie_block->ecb2 |= 0x08;
1956         vcpu->arch.sie_block->eca = 0x1002000U;
1957         if (sclp.has_cei)
1958                 vcpu->arch.sie_block->eca |= 0x80000000U;
1959         if (sclp.has_ib)
1960                 vcpu->arch.sie_block->eca |= 0x40000000U;
1961         if (sclp.has_siif)
1962                 vcpu->arch.sie_block->eca |= 1;
1963         if (sclp.has_sigpif)
1964                 vcpu->arch.sie_block->eca |= 0x10000000U;
1965         if (test_kvm_facility(vcpu->kvm, 129)) {
1966                 vcpu->arch.sie_block->eca |= 0x00020000;
1967                 vcpu->arch.sie_block->ecd |= 0x20000000;
1968         }
1969         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1970         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1971
1972         if (vcpu->kvm->arch.use_cmma) {
1973                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1974                 if (rc)
1975                         return rc;
1976         }
1977         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1978         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1979
1980         kvm_s390_vcpu_crypto_setup(vcpu);
1981
1982         return rc;
1983 }
1984
1985 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1986                                       unsigned int id)
1987 {
1988         struct kvm_vcpu *vcpu;
1989         struct sie_page *sie_page;
1990         int rc = -EINVAL;
1991
1992         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1993                 goto out;
1994
1995         rc = -ENOMEM;
1996
1997         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1998         if (!vcpu)
1999                 goto out;
2000
2001         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2002         if (!sie_page)
2003                 goto out_free_cpu;
2004
2005         vcpu->arch.sie_block = &sie_page->sie_block;
2006         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2007
2008         /* the real guest size will always be smaller than msl */
2009         vcpu->arch.sie_block->mso = 0;
2010         vcpu->arch.sie_block->msl = sclp.hamax;
2011
2012         vcpu->arch.sie_block->icpua = id;
2013         spin_lock_init(&vcpu->arch.local_int.lock);
2014         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2015         vcpu->arch.local_int.wq = &vcpu->wq;
2016         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2017         seqcount_init(&vcpu->arch.cputm_seqcount);
2018
2019         rc = kvm_vcpu_init(vcpu, kvm, id);
2020         if (rc)
2021                 goto out_free_sie_block;
2022         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2023                  vcpu->arch.sie_block);
2024         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2025
2026         return vcpu;
2027 out_free_sie_block:
2028         free_page((unsigned long)(vcpu->arch.sie_block));
2029 out_free_cpu:
2030         kmem_cache_free(kvm_vcpu_cache, vcpu);
2031 out:
2032         return ERR_PTR(rc);
2033 }
2034
2035 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2036 {
2037         return kvm_s390_vcpu_has_irq(vcpu, 0);
2038 }
2039
2040 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2041 {
2042         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2043         exit_sie(vcpu);
2044 }
2045
2046 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2047 {
2048         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2049 }
2050
2051 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2052 {
2053         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2054         exit_sie(vcpu);
2055 }
2056
2057 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2058 {
2059         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2060 }
2061
2062 /*
2063  * Kick a guest cpu out of SIE and wait until SIE is not running.
2064  * If the CPU is not running (e.g. waiting as idle) the function will
2065  * return immediately. */
2066 void exit_sie(struct kvm_vcpu *vcpu)
2067 {
2068         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2069         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2070                 cpu_relax();
2071 }
2072
2073 /* Kick a guest cpu out of SIE to process a request synchronously */
2074 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2075 {
2076         kvm_make_request(req, vcpu);
2077         kvm_s390_vcpu_request(vcpu);
2078 }
2079
2080 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2081                               unsigned long end)
2082 {
2083         struct kvm *kvm = gmap->private;
2084         struct kvm_vcpu *vcpu;
2085         unsigned long prefix;
2086         int i;
2087
2088         if (gmap_is_shadow(gmap))
2089                 return;
2090         if (start >= 1UL << 31)
2091                 /* We are only interested in prefix pages */
2092                 return;
2093         kvm_for_each_vcpu(i, vcpu, kvm) {
2094                 /* match against both prefix pages */
2095                 prefix = kvm_s390_get_prefix(vcpu);
2096                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2097                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2098                                    start, end);
2099                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2100                 }
2101         }
2102 }
2103
2104 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2105 {
2106         /* kvm common code refers to this, but never calls it */
2107         BUG();
2108         return 0;
2109 }
2110
2111 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2112                                            struct kvm_one_reg *reg)
2113 {
2114         int r = -EINVAL;
2115
2116         switch (reg->id) {
2117         case KVM_REG_S390_TODPR:
2118                 r = put_user(vcpu->arch.sie_block->todpr,
2119                              (u32 __user *)reg->addr);
2120                 break;
2121         case KVM_REG_S390_EPOCHDIFF:
2122                 r = put_user(vcpu->arch.sie_block->epoch,
2123                              (u64 __user *)reg->addr);
2124                 break;
2125         case KVM_REG_S390_CPU_TIMER:
2126                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2127                              (u64 __user *)reg->addr);
2128                 break;
2129         case KVM_REG_S390_CLOCK_COMP:
2130                 r = put_user(vcpu->arch.sie_block->ckc,
2131                              (u64 __user *)reg->addr);
2132                 break;
2133         case KVM_REG_S390_PFTOKEN:
2134                 r = put_user(vcpu->arch.pfault_token,
2135                              (u64 __user *)reg->addr);
2136                 break;
2137         case KVM_REG_S390_PFCOMPARE:
2138                 r = put_user(vcpu->arch.pfault_compare,
2139                              (u64 __user *)reg->addr);
2140                 break;
2141         case KVM_REG_S390_PFSELECT:
2142                 r = put_user(vcpu->arch.pfault_select,
2143                              (u64 __user *)reg->addr);
2144                 break;
2145         case KVM_REG_S390_PP:
2146                 r = put_user(vcpu->arch.sie_block->pp,
2147                              (u64 __user *)reg->addr);
2148                 break;
2149         case KVM_REG_S390_GBEA:
2150                 r = put_user(vcpu->arch.sie_block->gbea,
2151                              (u64 __user *)reg->addr);
2152                 break;
2153         default:
2154                 break;
2155         }
2156
2157         return r;
2158 }
2159
2160 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2161                                            struct kvm_one_reg *reg)
2162 {
2163         int r = -EINVAL;
2164         __u64 val;
2165
2166         switch (reg->id) {
2167         case KVM_REG_S390_TODPR:
2168                 r = get_user(vcpu->arch.sie_block->todpr,
2169                              (u32 __user *)reg->addr);
2170                 break;
2171         case KVM_REG_S390_EPOCHDIFF:
2172                 r = get_user(vcpu->arch.sie_block->epoch,
2173                              (u64 __user *)reg->addr);
2174                 break;
2175         case KVM_REG_S390_CPU_TIMER:
2176                 r = get_user(val, (u64 __user *)reg->addr);
2177                 if (!r)
2178                         kvm_s390_set_cpu_timer(vcpu, val);
2179                 break;
2180         case KVM_REG_S390_CLOCK_COMP:
2181                 r = get_user(vcpu->arch.sie_block->ckc,
2182                              (u64 __user *)reg->addr);
2183                 break;
2184         case KVM_REG_S390_PFTOKEN:
2185                 r = get_user(vcpu->arch.pfault_token,
2186                              (u64 __user *)reg->addr);
2187                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2188                         kvm_clear_async_pf_completion_queue(vcpu);
2189                 break;
2190         case KVM_REG_S390_PFCOMPARE:
2191                 r = get_user(vcpu->arch.pfault_compare,
2192                              (u64 __user *)reg->addr);
2193                 break;
2194         case KVM_REG_S390_PFSELECT:
2195                 r = get_user(vcpu->arch.pfault_select,
2196                              (u64 __user *)reg->addr);
2197                 break;
2198         case KVM_REG_S390_PP:
2199                 r = get_user(vcpu->arch.sie_block->pp,
2200                              (u64 __user *)reg->addr);
2201                 break;
2202         case KVM_REG_S390_GBEA:
2203                 r = get_user(vcpu->arch.sie_block->gbea,
2204                              (u64 __user *)reg->addr);
2205                 break;
2206         default:
2207                 break;
2208         }
2209
2210         return r;
2211 }
2212
2213 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2214 {
2215         kvm_s390_vcpu_initial_reset(vcpu);
2216         return 0;
2217 }
2218
2219 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2220 {
2221         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2222         return 0;
2223 }
2224
2225 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2226 {
2227         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2228         return 0;
2229 }
2230
2231 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2232                                   struct kvm_sregs *sregs)
2233 {
2234         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2235         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2236         restore_access_regs(vcpu->run->s.regs.acrs);
2237         return 0;
2238 }
2239
2240 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2241                                   struct kvm_sregs *sregs)
2242 {
2243         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2244         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2245         return 0;
2246 }
2247
2248 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2249 {
2250         /* make sure the new values will be lazily loaded */
2251         save_fpu_regs();
2252         if (test_fp_ctl(fpu->fpc))
2253                 return -EINVAL;
2254         current->thread.fpu.fpc = fpu->fpc;
2255         if (MACHINE_HAS_VX)
2256                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2257         else
2258                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2259         return 0;
2260 }
2261
2262 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2263 {
2264         /* make sure we have the latest values */
2265         save_fpu_regs();
2266         if (MACHINE_HAS_VX)
2267                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2268         else
2269                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2270         fpu->fpc = current->thread.fpu.fpc;
2271         return 0;
2272 }
2273
2274 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2275 {
2276         int rc = 0;
2277
2278         if (!is_vcpu_stopped(vcpu))
2279                 rc = -EBUSY;
2280         else {
2281                 vcpu->run->psw_mask = psw.mask;
2282                 vcpu->run->psw_addr = psw.addr;
2283         }
2284         return rc;
2285 }
2286
2287 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2288                                   struct kvm_translation *tr)
2289 {
2290         return -EINVAL; /* not implemented yet */
2291 }
2292
2293 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2294                               KVM_GUESTDBG_USE_HW_BP | \
2295                               KVM_GUESTDBG_ENABLE)
2296
2297 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2298                                         struct kvm_guest_debug *dbg)
2299 {
2300         int rc = 0;
2301
2302         vcpu->guest_debug = 0;
2303         kvm_s390_clear_bp_data(vcpu);
2304
2305         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2306                 return -EINVAL;
2307         if (!sclp.has_gpere)
2308                 return -EINVAL;
2309
2310         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2311                 vcpu->guest_debug = dbg->control;
2312                 /* enforce guest PER */
2313                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2314
2315                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2316                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2317         } else {
2318                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2319                 vcpu->arch.guestdbg.last_bp = 0;
2320         }
2321
2322         if (rc) {
2323                 vcpu->guest_debug = 0;
2324                 kvm_s390_clear_bp_data(vcpu);
2325                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2326         }
2327
2328         return rc;
2329 }
2330
2331 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2332                                     struct kvm_mp_state *mp_state)
2333 {
2334         /* CHECK_STOP and LOAD are not supported yet */
2335         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2336                                        KVM_MP_STATE_OPERATING;
2337 }
2338
2339 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2340                                     struct kvm_mp_state *mp_state)
2341 {
2342         int rc = 0;
2343
2344         /* user space knows about this interface - let it control the state */
2345         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2346
2347         switch (mp_state->mp_state) {
2348         case KVM_MP_STATE_STOPPED:
2349                 kvm_s390_vcpu_stop(vcpu);
2350                 break;
2351         case KVM_MP_STATE_OPERATING:
2352                 kvm_s390_vcpu_start(vcpu);
2353                 break;
2354         case KVM_MP_STATE_LOAD:
2355         case KVM_MP_STATE_CHECK_STOP:
2356                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2357         default:
2358                 rc = -ENXIO;
2359         }
2360
2361         return rc;
2362 }
2363
2364 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2365 {
2366         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2367 }
2368
2369 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2370 {
2371 retry:
2372         kvm_s390_vcpu_request_handled(vcpu);
2373         if (!vcpu->requests)
2374                 return 0;
2375         /*
2376          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2377          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2378          * This ensures that the ipte instruction for this request has
2379          * already finished. We might race against a second unmapper that
2380          * wants to set the blocking bit. Lets just retry the request loop.
2381          */
2382         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2383                 int rc;
2384                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2385                                           kvm_s390_get_prefix(vcpu),
2386                                           PAGE_SIZE * 2, PROT_WRITE);
2387                 if (rc) {
2388                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2389                         return rc;
2390                 }
2391                 goto retry;
2392         }
2393
2394         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2395                 vcpu->arch.sie_block->ihcpu = 0xffff;
2396                 goto retry;
2397         }
2398
2399         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2400                 if (!ibs_enabled(vcpu)) {
2401                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2402                         atomic_or(CPUSTAT_IBS,
2403                                         &vcpu->arch.sie_block->cpuflags);
2404                 }
2405                 goto retry;
2406         }
2407
2408         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2409                 if (ibs_enabled(vcpu)) {
2410                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2411                         atomic_andnot(CPUSTAT_IBS,
2412                                           &vcpu->arch.sie_block->cpuflags);
2413                 }
2414                 goto retry;
2415         }
2416
2417         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2418                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2419                 goto retry;
2420         }
2421
2422         /* nothing to do, just clear the request */
2423         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2424
2425         return 0;
2426 }
2427
2428 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2429 {
2430         struct kvm_vcpu *vcpu;
2431         int i;
2432
2433         mutex_lock(&kvm->lock);
2434         preempt_disable();
2435         kvm->arch.epoch = tod - get_tod_clock();
2436         kvm_s390_vcpu_block_all(kvm);
2437         kvm_for_each_vcpu(i, vcpu, kvm)
2438                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2439         kvm_s390_vcpu_unblock_all(kvm);
2440         preempt_enable();
2441         mutex_unlock(&kvm->lock);
2442 }
2443
2444 /**
2445  * kvm_arch_fault_in_page - fault-in guest page if necessary
2446  * @vcpu: The corresponding virtual cpu
2447  * @gpa: Guest physical address
2448  * @writable: Whether the page should be writable or not
2449  *
2450  * Make sure that a guest page has been faulted-in on the host.
2451  *
2452  * Return: Zero on success, negative error code otherwise.
2453  */
2454 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2455 {
2456         return gmap_fault(vcpu->arch.gmap, gpa,
2457                           writable ? FAULT_FLAG_WRITE : 0);
2458 }
2459
2460 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2461                                       unsigned long token)
2462 {
2463         struct kvm_s390_interrupt inti;
2464         struct kvm_s390_irq irq;
2465
2466         if (start_token) {
2467                 irq.u.ext.ext_params2 = token;
2468                 irq.type = KVM_S390_INT_PFAULT_INIT;
2469                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2470         } else {
2471                 inti.type = KVM_S390_INT_PFAULT_DONE;
2472                 inti.parm64 = token;
2473                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2474         }
2475 }
2476
2477 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2478                                      struct kvm_async_pf *work)
2479 {
2480         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2481         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2482 }
2483
2484 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2485                                  struct kvm_async_pf *work)
2486 {
2487         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2488         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2489 }
2490
2491 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2492                                struct kvm_async_pf *work)
2493 {
2494         /* s390 will always inject the page directly */
2495 }
2496
2497 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2498 {
2499         /*
2500          * s390 will always inject the page directly,
2501          * but we still want check_async_completion to cleanup
2502          */
2503         return true;
2504 }
2505
2506 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2507 {
2508         hva_t hva;
2509         struct kvm_arch_async_pf arch;
2510         int rc;
2511
2512         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2513                 return 0;
2514         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2515             vcpu->arch.pfault_compare)
2516                 return 0;
2517         if (psw_extint_disabled(vcpu))
2518                 return 0;
2519         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2520                 return 0;
2521         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2522                 return 0;
2523         if (!vcpu->arch.gmap->pfault_enabled)
2524                 return 0;
2525
2526         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2527         hva += current->thread.gmap_addr & ~PAGE_MASK;
2528         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2529                 return 0;
2530
2531         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2532         return rc;
2533 }
2534
2535 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2536 {
2537         int rc, cpuflags;
2538
2539         /*
2540          * On s390 notifications for arriving pages will be delivered directly
2541          * to the guest but the house keeping for completed pfaults is
2542          * handled outside the worker.
2543          */
2544         kvm_check_async_pf_completion(vcpu);
2545
2546         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2547         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2548
2549         if (need_resched())
2550                 schedule();
2551
2552         if (test_cpu_flag(CIF_MCCK_PENDING))
2553                 s390_handle_mcck();
2554
2555         if (!kvm_is_ucontrol(vcpu->kvm)) {
2556                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2557                 if (rc)
2558                         return rc;
2559         }
2560
2561         rc = kvm_s390_handle_requests(vcpu);
2562         if (rc)
2563                 return rc;
2564
2565         if (guestdbg_enabled(vcpu)) {
2566                 kvm_s390_backup_guest_per_regs(vcpu);
2567                 kvm_s390_patch_guest_per_regs(vcpu);
2568         }
2569
2570         vcpu->arch.sie_block->icptcode = 0;
2571         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2572         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2573         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2574
2575         return 0;
2576 }
2577
2578 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2579 {
2580         struct kvm_s390_pgm_info pgm_info = {
2581                 .code = PGM_ADDRESSING,
2582         };
2583         u8 opcode, ilen;
2584         int rc;
2585
2586         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2587         trace_kvm_s390_sie_fault(vcpu);
2588
2589         /*
2590          * We want to inject an addressing exception, which is defined as a
2591          * suppressing or terminating exception. However, since we came here
2592          * by a DAT access exception, the PSW still points to the faulting
2593          * instruction since DAT exceptions are nullifying. So we've got
2594          * to look up the current opcode to get the length of the instruction
2595          * to be able to forward the PSW.
2596          */
2597         rc = read_guest_instr(vcpu, &opcode, 1);
2598         ilen = insn_length(opcode);
2599         if (rc < 0) {
2600                 return rc;
2601         } else if (rc) {
2602                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2603                  * Forward by arbitrary ilc, injection will take care of
2604                  * nullification if necessary.
2605                  */
2606                 pgm_info = vcpu->arch.pgm;
2607                 ilen = 4;
2608         }
2609         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2610         kvm_s390_forward_psw(vcpu, ilen);
2611         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2612 }
2613
2614 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2615 {
2616         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2617                    vcpu->arch.sie_block->icptcode);
2618         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2619
2620         if (guestdbg_enabled(vcpu))
2621                 kvm_s390_restore_guest_per_regs(vcpu);
2622
2623         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2624         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2625
2626         if (vcpu->arch.sie_block->icptcode > 0) {
2627                 int rc = kvm_handle_sie_intercept(vcpu);
2628
2629                 if (rc != -EOPNOTSUPP)
2630                         return rc;
2631                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2632                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2633                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2634                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2635                 return -EREMOTE;
2636         } else if (exit_reason != -EFAULT) {
2637                 vcpu->stat.exit_null++;
2638                 return 0;
2639         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2640                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2641                 vcpu->run->s390_ucontrol.trans_exc_code =
2642                                                 current->thread.gmap_addr;
2643                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2644                 return -EREMOTE;
2645         } else if (current->thread.gmap_pfault) {
2646                 trace_kvm_s390_major_guest_pfault(vcpu);
2647                 current->thread.gmap_pfault = 0;
2648                 if (kvm_arch_setup_async_pf(vcpu))
2649                         return 0;
2650                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2651         }
2652         return vcpu_post_run_fault_in_sie(vcpu);
2653 }
2654
2655 static int __vcpu_run(struct kvm_vcpu *vcpu)
2656 {
2657         int rc, exit_reason;
2658
2659         /*
2660          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2661          * ning the guest), so that memslots (and other stuff) are protected
2662          */
2663         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2664
2665         do {
2666                 rc = vcpu_pre_run(vcpu);
2667                 if (rc)
2668                         break;
2669
2670                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2671                 /*
2672                  * As PF_VCPU will be used in fault handler, between
2673                  * guest_enter and guest_exit should be no uaccess.
2674                  */
2675                 local_irq_disable();
2676                 guest_enter_irqoff();
2677                 __disable_cpu_timer_accounting(vcpu);
2678                 local_irq_enable();
2679                 exit_reason = sie64a(vcpu->arch.sie_block,
2680                                      vcpu->run->s.regs.gprs);
2681                 local_irq_disable();
2682                 __enable_cpu_timer_accounting(vcpu);
2683                 guest_exit_irqoff();
2684                 local_irq_enable();
2685                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2686
2687                 rc = vcpu_post_run(vcpu, exit_reason);
2688         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2689
2690         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2691         return rc;
2692 }
2693
2694 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2695 {
2696         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2697         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2698         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2699                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2700         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2701                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2702                 /* some control register changes require a tlb flush */
2703                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2704         }
2705         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2706                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2707                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2708                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2709                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2710                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2711         }
2712         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2713                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2714                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2715                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2716                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2717                         kvm_clear_async_pf_completion_queue(vcpu);
2718         }
2719         /*
2720          * If userspace sets the riccb (e.g. after migration) to a valid state,
2721          * we should enable RI here instead of doing the lazy enablement.
2722          */
2723         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2724             test_kvm_facility(vcpu->kvm, 64)) {
2725                 struct runtime_instr_cb *riccb =
2726                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2727
2728                 if (riccb->valid)
2729                         vcpu->arch.sie_block->ecb3 |= 0x01;
2730         }
2731
2732         kvm_run->kvm_dirty_regs = 0;
2733 }
2734
2735 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2736 {
2737         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2738         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2739         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2740         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2741         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2742         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2743         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2744         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2745         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2746         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2747         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2748         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2749 }
2750
2751 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2752 {
2753         int rc;
2754         sigset_t sigsaved;
2755
2756         if (guestdbg_exit_pending(vcpu)) {
2757                 kvm_s390_prepare_debug_exit(vcpu);
2758                 return 0;
2759         }
2760
2761         if (vcpu->sigset_active)
2762                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2763
2764         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2765                 kvm_s390_vcpu_start(vcpu);
2766         } else if (is_vcpu_stopped(vcpu)) {
2767                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2768                                    vcpu->vcpu_id);
2769                 return -EINVAL;
2770         }
2771
2772         sync_regs(vcpu, kvm_run);
2773         enable_cpu_timer_accounting(vcpu);
2774
2775         might_fault();
2776         rc = __vcpu_run(vcpu);
2777
2778         if (signal_pending(current) && !rc) {
2779                 kvm_run->exit_reason = KVM_EXIT_INTR;
2780                 rc = -EINTR;
2781         }
2782
2783         if (guestdbg_exit_pending(vcpu) && !rc)  {
2784                 kvm_s390_prepare_debug_exit(vcpu);
2785                 rc = 0;
2786         }
2787
2788         if (rc == -EREMOTE) {
2789                 /* userspace support is needed, kvm_run has been prepared */
2790                 rc = 0;
2791         }
2792
2793         disable_cpu_timer_accounting(vcpu);
2794         store_regs(vcpu, kvm_run);
2795
2796         if (vcpu->sigset_active)
2797                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2798
2799         vcpu->stat.exit_userspace++;
2800         return rc;
2801 }
2802
2803 /*
2804  * store status at address
2805  * we use have two special cases:
2806  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2807  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2808  */
2809 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2810 {
2811         unsigned char archmode = 1;
2812         freg_t fprs[NUM_FPRS];
2813         unsigned int px;
2814         u64 clkcomp, cputm;
2815         int rc;
2816
2817         px = kvm_s390_get_prefix(vcpu);
2818         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2819                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2820                         return -EFAULT;
2821                 gpa = 0;
2822         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2823                 if (write_guest_real(vcpu, 163, &archmode, 1))
2824                         return -EFAULT;
2825                 gpa = px;
2826         } else
2827                 gpa -= __LC_FPREGS_SAVE_AREA;
2828
2829         /* manually convert vector registers if necessary */
2830         if (MACHINE_HAS_VX) {
2831                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2832                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2833                                      fprs, 128);
2834         } else {
2835                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2836                                      vcpu->run->s.regs.fprs, 128);
2837         }
2838         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2839                               vcpu->run->s.regs.gprs, 128);
2840         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2841                               &vcpu->arch.sie_block->gpsw, 16);
2842         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2843                               &px, 4);
2844         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2845                               &vcpu->run->s.regs.fpc, 4);
2846         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2847                               &vcpu->arch.sie_block->todpr, 4);
2848         cputm = kvm_s390_get_cpu_timer(vcpu);
2849         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2850                               &cputm, 8);
2851         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2852         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2853                               &clkcomp, 8);
2854         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2855                               &vcpu->run->s.regs.acrs, 64);
2856         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2857                               &vcpu->arch.sie_block->gcr, 128);
2858         return rc ? -EFAULT : 0;
2859 }
2860
2861 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2862 {
2863         /*
2864          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2865          * copying in vcpu load/put. Lets update our copies before we save
2866          * it into the save area
2867          */
2868         save_fpu_regs();
2869         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2870         save_access_regs(vcpu->run->s.regs.acrs);
2871
2872         return kvm_s390_store_status_unloaded(vcpu, addr);
2873 }
2874
2875 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2876 {
2877         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2878         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2879 }
2880
2881 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2882 {
2883         unsigned int i;
2884         struct kvm_vcpu *vcpu;
2885
2886         kvm_for_each_vcpu(i, vcpu, kvm) {
2887                 __disable_ibs_on_vcpu(vcpu);
2888         }
2889 }
2890
2891 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2892 {
2893         if (!sclp.has_ibs)
2894                 return;
2895         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2896         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2897 }
2898
2899 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2900 {
2901         int i, online_vcpus, started_vcpus = 0;
2902
2903         if (!is_vcpu_stopped(vcpu))
2904                 return;
2905
2906         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2907         /* Only one cpu at a time may enter/leave the STOPPED state. */
2908         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2909         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2910
2911         for (i = 0; i < online_vcpus; i++) {
2912                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2913                         started_vcpus++;
2914         }
2915
2916         if (started_vcpus == 0) {
2917                 /* we're the only active VCPU -> speed it up */
2918                 __enable_ibs_on_vcpu(vcpu);
2919         } else if (started_vcpus == 1) {
2920                 /*
2921                  * As we are starting a second VCPU, we have to disable
2922                  * the IBS facility on all VCPUs to remove potentially
2923                  * oustanding ENABLE requests.
2924                  */
2925                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2926         }
2927
2928         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2929         /*
2930          * Another VCPU might have used IBS while we were offline.
2931          * Let's play safe and flush the VCPU at startup.
2932          */
2933         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2934         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2935         return;
2936 }
2937
2938 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2939 {
2940         int i, online_vcpus, started_vcpus = 0;
2941         struct kvm_vcpu *started_vcpu = NULL;
2942
2943         if (is_vcpu_stopped(vcpu))
2944                 return;
2945
2946         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2947         /* Only one cpu at a time may enter/leave the STOPPED state. */
2948         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2949         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2950
2951         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2952         kvm_s390_clear_stop_irq(vcpu);
2953
2954         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2955         __disable_ibs_on_vcpu(vcpu);
2956
2957         for (i = 0; i < online_vcpus; i++) {
2958                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2959                         started_vcpus++;
2960                         started_vcpu = vcpu->kvm->vcpus[i];
2961                 }
2962         }
2963
2964         if (started_vcpus == 1) {
2965                 /*
2966                  * As we only have one VCPU left, we want to enable the
2967                  * IBS facility for that VCPU to speed it up.
2968                  */
2969                 __enable_ibs_on_vcpu(started_vcpu);
2970         }
2971
2972         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2973         return;
2974 }
2975
2976 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2977                                      struct kvm_enable_cap *cap)
2978 {
2979         int r;
2980
2981         if (cap->flags)
2982                 return -EINVAL;
2983
2984         switch (cap->cap) {
2985         case KVM_CAP_S390_CSS_SUPPORT:
2986                 if (!vcpu->kvm->arch.css_support) {
2987                         vcpu->kvm->arch.css_support = 1;
2988                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2989                         trace_kvm_s390_enable_css(vcpu->kvm);
2990                 }
2991                 r = 0;
2992                 break;
2993         default:
2994                 r = -EINVAL;
2995                 break;
2996         }
2997         return r;
2998 }
2999
3000 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3001                                   struct kvm_s390_mem_op *mop)
3002 {
3003         void __user *uaddr = (void __user *)mop->buf;
3004         void *tmpbuf = NULL;
3005         int r, srcu_idx;
3006         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3007                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3008
3009         if (mop->flags & ~supported_flags)
3010                 return -EINVAL;
3011
3012         if (mop->size > MEM_OP_MAX_SIZE)
3013                 return -E2BIG;
3014
3015         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3016                 tmpbuf = vmalloc(mop->size);
3017                 if (!tmpbuf)
3018                         return -ENOMEM;
3019         }
3020
3021         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3022
3023         switch (mop->op) {
3024         case KVM_S390_MEMOP_LOGICAL_READ:
3025                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3026                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3027                                             mop->size, GACC_FETCH);
3028                         break;
3029                 }
3030                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3031                 if (r == 0) {
3032                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3033                                 r = -EFAULT;
3034                 }
3035                 break;
3036         case KVM_S390_MEMOP_LOGICAL_WRITE:
3037                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3038                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3039                                             mop->size, GACC_STORE);
3040                         break;
3041                 }
3042                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3043                         r = -EFAULT;
3044                         break;
3045                 }
3046                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3047                 break;
3048         default:
3049                 r = -EINVAL;
3050         }
3051
3052         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3053
3054         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3055                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3056
3057         vfree(tmpbuf);
3058         return r;
3059 }
3060
3061 long kvm_arch_vcpu_ioctl(struct file *filp,
3062                          unsigned int ioctl, unsigned long arg)
3063 {
3064         struct kvm_vcpu *vcpu = filp->private_data;
3065         void __user *argp = (void __user *)arg;
3066         int idx;
3067         long r;
3068
3069         switch (ioctl) {
3070         case KVM_S390_IRQ: {
3071                 struct kvm_s390_irq s390irq;
3072
3073                 r = -EFAULT;
3074                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3075                         break;
3076                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3077                 break;
3078         }
3079         case KVM_S390_INTERRUPT: {
3080                 struct kvm_s390_interrupt s390int;
3081                 struct kvm_s390_irq s390irq;
3082
3083                 r = -EFAULT;
3084                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3085                         break;
3086                 if (s390int_to_s390irq(&s390int, &s390irq))
3087                         return -EINVAL;
3088                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3089                 break;
3090         }
3091         case KVM_S390_STORE_STATUS:
3092                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3093                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3094                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3095                 break;
3096         case KVM_S390_SET_INITIAL_PSW: {
3097                 psw_t psw;
3098
3099                 r = -EFAULT;
3100                 if (copy_from_user(&psw, argp, sizeof(psw)))
3101                         break;
3102                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3103                 break;
3104         }
3105         case KVM_S390_INITIAL_RESET:
3106                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3107                 break;
3108         case KVM_SET_ONE_REG:
3109         case KVM_GET_ONE_REG: {
3110                 struct kvm_one_reg reg;
3111                 r = -EFAULT;
3112                 if (copy_from_user(&reg, argp, sizeof(reg)))
3113                         break;
3114                 if (ioctl == KVM_SET_ONE_REG)
3115                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3116                 else
3117                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3118                 break;
3119         }
3120 #ifdef CONFIG_KVM_S390_UCONTROL
3121         case KVM_S390_UCAS_MAP: {
3122                 struct kvm_s390_ucas_mapping ucasmap;
3123
3124                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3125                         r = -EFAULT;
3126                         break;
3127                 }
3128
3129                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3130                         r = -EINVAL;
3131                         break;
3132                 }
3133
3134                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3135                                      ucasmap.vcpu_addr, ucasmap.length);
3136                 break;
3137         }
3138         case KVM_S390_UCAS_UNMAP: {
3139                 struct kvm_s390_ucas_mapping ucasmap;
3140
3141                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3142                         r = -EFAULT;
3143                         break;
3144                 }
3145
3146                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3147                         r = -EINVAL;
3148                         break;
3149                 }
3150
3151                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3152                         ucasmap.length);
3153                 break;
3154         }
3155 #endif
3156         case KVM_S390_VCPU_FAULT: {
3157                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3158                 break;
3159         }
3160         case KVM_ENABLE_CAP:
3161         {
3162                 struct kvm_enable_cap cap;
3163                 r = -EFAULT;
3164                 if (copy_from_user(&cap, argp, sizeof(cap)))
3165                         break;
3166                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3167                 break;
3168         }
3169         case KVM_S390_MEM_OP: {
3170                 struct kvm_s390_mem_op mem_op;
3171
3172                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3173                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3174                 else
3175                         r = -EFAULT;
3176                 break;
3177         }
3178         case KVM_S390_SET_IRQ_STATE: {
3179                 struct kvm_s390_irq_state irq_state;
3180
3181                 r = -EFAULT;
3182                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3183                         break;
3184                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3185                     irq_state.len == 0 ||
3186                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3187                         r = -EINVAL;
3188                         break;
3189                 }
3190                 r = kvm_s390_set_irq_state(vcpu,
3191                                            (void __user *) irq_state.buf,
3192                                            irq_state.len);
3193                 break;
3194         }
3195         case KVM_S390_GET_IRQ_STATE: {
3196                 struct kvm_s390_irq_state irq_state;
3197
3198                 r = -EFAULT;
3199                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3200                         break;
3201                 if (irq_state.len == 0) {
3202                         r = -EINVAL;
3203                         break;
3204                 }
3205                 r = kvm_s390_get_irq_state(vcpu,
3206                                            (__u8 __user *)  irq_state.buf,
3207                                            irq_state.len);
3208                 break;
3209         }
3210         default:
3211                 r = -ENOTTY;
3212         }
3213         return r;
3214 }
3215
3216 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3217 {
3218 #ifdef CONFIG_KVM_S390_UCONTROL
3219         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3220                  && (kvm_is_ucontrol(vcpu->kvm))) {
3221                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3222                 get_page(vmf->page);
3223                 return 0;
3224         }
3225 #endif
3226         return VM_FAULT_SIGBUS;
3227 }
3228
3229 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3230                             unsigned long npages)
3231 {
3232         return 0;
3233 }
3234
3235 /* Section: memory related */
3236 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3237                                    struct kvm_memory_slot *memslot,
3238                                    const struct kvm_userspace_memory_region *mem,
3239                                    enum kvm_mr_change change)
3240 {
3241         /* A few sanity checks. We can have memory slots which have to be
3242            located/ended at a segment boundary (1MB). The memory in userland is
3243            ok to be fragmented into various different vmas. It is okay to mmap()
3244            and munmap() stuff in this slot after doing this call at any time */
3245
3246         if (mem->userspace_addr & 0xffffful)
3247                 return -EINVAL;
3248
3249         if (mem->memory_size & 0xffffful)
3250                 return -EINVAL;
3251
3252         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3253                 return -EINVAL;
3254
3255         return 0;
3256 }
3257
3258 void kvm_arch_commit_memory_region(struct kvm *kvm,
3259                                 const struct kvm_userspace_memory_region *mem,
3260                                 const struct kvm_memory_slot *old,
3261                                 const struct kvm_memory_slot *new,
3262                                 enum kvm_mr_change change)
3263 {
3264         int rc;
3265
3266         /* If the basics of the memslot do not change, we do not want
3267          * to update the gmap. Every update causes several unnecessary
3268          * segment translation exceptions. This is usually handled just
3269          * fine by the normal fault handler + gmap, but it will also
3270          * cause faults on the prefix page of running guest CPUs.
3271          */
3272         if (old->userspace_addr == mem->userspace_addr &&
3273             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3274             old->npages * PAGE_SIZE == mem->memory_size)
3275                 return;
3276
3277         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3278                 mem->guest_phys_addr, mem->memory_size);
3279         if (rc)
3280                 pr_warn("failed to commit memory region\n");
3281         return;
3282 }
3283
3284 static inline unsigned long nonhyp_mask(int i)
3285 {
3286         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3287
3288         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3289 }
3290
3291 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3292 {
3293         vcpu->valid_wakeup = false;
3294 }
3295
3296 static int __init kvm_s390_init(void)
3297 {
3298         int i;
3299
3300         if (!sclp.has_sief2) {
3301                 pr_info("SIE not available\n");
3302                 return -ENODEV;
3303         }
3304
3305         for (i = 0; i < 16; i++)
3306                 kvm_s390_fac_list_mask[i] |=
3307                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3308
3309         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3310 }
3311
3312 static void __exit kvm_s390_exit(void)
3313 {
3314         kvm_exit();
3315 }
3316
3317 module_init(kvm_s390_init);
3318 module_exit(kvm_s390_exit);
3319
3320 /*
3321  * Enable autoloading of the kvm module.
3322  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3323  * since x86 takes a different approach.
3324  */
3325 #include <linux/miscdevice.h>
3326 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3327 MODULE_ALIAS("devname:kvm");