Commit | Line | Data |
---|---|---|
95ca2cb5 JF |
1 | /* |
2 | * store hypervisor information instruction emulation functions. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License (version 2 only) | |
6 | * as published by the Free Software Foundation. | |
7 | * | |
8 | * Copyright IBM Corp. 2016 | |
9 | * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> | |
10 | */ | |
11 | #include <linux/kvm_host.h> | |
12 | #include <linux/errno.h> | |
13 | #include <linux/pagemap.h> | |
14 | #include <linux/vmalloc.h> | |
7d0a5e62 | 15 | #include <linux/ratelimit.h> |
95ca2cb5 JF |
16 | |
17 | #include <asm/kvm_host.h> | |
18 | #include <asm/asm-offsets.h> | |
19 | #include <asm/sclp.h> | |
20 | #include <asm/diag.h> | |
21 | #include <asm/sysinfo.h> | |
22 | #include <asm/ebcdic.h> | |
23 | ||
24 | #include "kvm-s390.h" | |
25 | #include "gaccess.h" | |
26 | #include "trace.h" | |
27 | ||
28 | #define DED_WEIGHT 0xffff | |
29 | /* | |
30 | * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string | |
31 | * as they are justified with spaces. | |
32 | */ | |
33 | #define CP 0xc3d7404040404040UL | |
34 | #define IFL 0xc9c6d34040404040UL | |
35 | ||
36 | enum hdr_flags { | |
37 | HDR_NOT_LPAR = 0x10, | |
38 | HDR_STACK_INCM = 0x20, | |
39 | HDR_STSI_UNAV = 0x40, | |
40 | HDR_PERF_UNAV = 0x80, | |
41 | }; | |
42 | ||
43 | enum mac_validity { | |
44 | MAC_NAME_VLD = 0x20, | |
45 | MAC_ID_VLD = 0x40, | |
46 | MAC_CNT_VLD = 0x80, | |
47 | }; | |
48 | ||
49 | enum par_flag { | |
50 | PAR_MT_EN = 0x80, | |
51 | }; | |
52 | ||
53 | enum par_validity { | |
54 | PAR_GRP_VLD = 0x08, | |
55 | PAR_ID_VLD = 0x10, | |
56 | PAR_ABS_VLD = 0x20, | |
57 | PAR_WGHT_VLD = 0x40, | |
58 | PAR_PCNT_VLD = 0x80, | |
59 | }; | |
60 | ||
61 | struct hdr_sctn { | |
62 | u8 infhflg1; | |
63 | u8 infhflg2; /* reserved */ | |
64 | u8 infhval1; /* reserved */ | |
65 | u8 infhval2; /* reserved */ | |
66 | u8 reserved[3]; | |
67 | u8 infhygct; | |
68 | u16 infhtotl; | |
69 | u16 infhdln; | |
70 | u16 infmoff; | |
71 | u16 infmlen; | |
72 | u16 infpoff; | |
73 | u16 infplen; | |
74 | u16 infhoff1; | |
75 | u16 infhlen1; | |
76 | u16 infgoff1; | |
77 | u16 infglen1; | |
78 | u16 infhoff2; | |
79 | u16 infhlen2; | |
80 | u16 infgoff2; | |
81 | u16 infglen2; | |
82 | u16 infhoff3; | |
83 | u16 infhlen3; | |
84 | u16 infgoff3; | |
85 | u16 infglen3; | |
86 | u8 reserved2[4]; | |
87 | } __packed; | |
88 | ||
89 | struct mac_sctn { | |
90 | u8 infmflg1; /* reserved */ | |
91 | u8 infmflg2; /* reserved */ | |
92 | u8 infmval1; | |
93 | u8 infmval2; /* reserved */ | |
94 | u16 infmscps; | |
95 | u16 infmdcps; | |
96 | u16 infmsifl; | |
97 | u16 infmdifl; | |
98 | char infmname[8]; | |
99 | char infmtype[4]; | |
100 | char infmmanu[16]; | |
101 | char infmseq[16]; | |
102 | char infmpman[4]; | |
103 | u8 reserved[4]; | |
104 | } __packed; | |
105 | ||
106 | struct par_sctn { | |
107 | u8 infpflg1; | |
108 | u8 infpflg2; /* reserved */ | |
109 | u8 infpval1; | |
110 | u8 infpval2; /* reserved */ | |
111 | u16 infppnum; | |
112 | u16 infpscps; | |
113 | u16 infpdcps; | |
114 | u16 infpsifl; | |
115 | u16 infpdifl; | |
116 | u16 reserved; | |
117 | char infppnam[8]; | |
118 | u32 infpwbcp; | |
119 | u32 infpabcp; | |
120 | u32 infpwbif; | |
121 | u32 infpabif; | |
122 | char infplgnm[8]; | |
123 | u32 infplgcp; | |
124 | u32 infplgif; | |
125 | } __packed; | |
126 | ||
127 | struct sthyi_sctns { | |
128 | struct hdr_sctn hdr; | |
129 | struct mac_sctn mac; | |
130 | struct par_sctn par; | |
131 | } __packed; | |
132 | ||
133 | struct cpu_inf { | |
134 | u64 lpar_cap; | |
135 | u64 lpar_grp_cap; | |
136 | u64 lpar_weight; | |
137 | u64 all_weight; | |
138 | int cpu_num_ded; | |
139 | int cpu_num_shd; | |
140 | }; | |
141 | ||
142 | struct lpar_cpu_inf { | |
143 | struct cpu_inf cp; | |
144 | struct cpu_inf ifl; | |
145 | }; | |
146 | ||
147 | static inline u64 cpu_id(u8 ctidx, void *diag224_buf) | |
148 | { | |
149 | return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); | |
150 | } | |
151 | ||
152 | /* | |
153 | * Scales the cpu capping from the lpar range to the one expected in | |
154 | * sthyi data. | |
155 | * | |
156 | * diag204 reports a cap in hundredths of processor units. | |
157 | * z/VM's range for one core is 0 - 0x10000. | |
158 | */ | |
159 | static u32 scale_cap(u32 in) | |
160 | { | |
161 | return (0x10000 * in) / 100; | |
162 | } | |
163 | ||
164 | static void fill_hdr(struct sthyi_sctns *sctns) | |
165 | { | |
166 | sctns->hdr.infhdln = sizeof(sctns->hdr); | |
167 | sctns->hdr.infmoff = sizeof(sctns->hdr); | |
168 | sctns->hdr.infmlen = sizeof(sctns->mac); | |
169 | sctns->hdr.infplen = sizeof(sctns->par); | |
170 | sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; | |
171 | sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; | |
172 | } | |
173 | ||
174 | static void fill_stsi_mac(struct sthyi_sctns *sctns, | |
175 | struct sysinfo_1_1_1 *sysinfo) | |
176 | { | |
177 | if (stsi(sysinfo, 1, 1, 1)) | |
178 | return; | |
179 | ||
180 | sclp_ocf_cpc_name_copy(sctns->mac.infmname); | |
181 | ||
182 | memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); | |
183 | memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); | |
184 | memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); | |
185 | memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); | |
186 | ||
187 | sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; | |
188 | } | |
189 | ||
190 | static void fill_stsi_par(struct sthyi_sctns *sctns, | |
191 | struct sysinfo_2_2_2 *sysinfo) | |
192 | { | |
193 | if (stsi(sysinfo, 2, 2, 2)) | |
194 | return; | |
195 | ||
196 | sctns->par.infppnum = sysinfo->lpar_number; | |
197 | memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); | |
198 | ||
199 | sctns->par.infpval1 |= PAR_ID_VLD; | |
200 | } | |
201 | ||
202 | static void fill_stsi(struct sthyi_sctns *sctns) | |
203 | { | |
204 | void *sysinfo; | |
205 | ||
206 | /* Errors are handled through the validity bits in the response. */ | |
207 | sysinfo = (void *)__get_free_page(GFP_KERNEL); | |
208 | if (!sysinfo) | |
209 | return; | |
210 | ||
211 | fill_stsi_mac(sctns, sysinfo); | |
212 | fill_stsi_par(sctns, sysinfo); | |
213 | ||
214 | free_pages((unsigned long)sysinfo, 0); | |
215 | } | |
216 | ||
217 | static void fill_diag_mac(struct sthyi_sctns *sctns, | |
218 | struct diag204_x_phys_block *block, | |
219 | void *diag224_buf) | |
220 | { | |
221 | int i; | |
222 | ||
223 | for (i = 0; i < block->hdr.cpus; i++) { | |
224 | switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { | |
225 | case CP: | |
226 | if (block->cpus[i].weight == DED_WEIGHT) | |
227 | sctns->mac.infmdcps++; | |
228 | else | |
229 | sctns->mac.infmscps++; | |
230 | break; | |
231 | case IFL: | |
232 | if (block->cpus[i].weight == DED_WEIGHT) | |
233 | sctns->mac.infmdifl++; | |
234 | else | |
235 | sctns->mac.infmsifl++; | |
236 | break; | |
237 | } | |
238 | } | |
239 | sctns->mac.infmval1 |= MAC_CNT_VLD; | |
240 | } | |
241 | ||
242 | /* Returns a pointer to the the next partition block. */ | |
243 | static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, | |
244 | bool this_lpar, | |
245 | void *diag224_buf, | |
246 | struct diag204_x_part_block *block) | |
247 | { | |
248 | int i, capped = 0, weight_cp = 0, weight_ifl = 0; | |
249 | struct cpu_inf *cpu_inf; | |
250 | ||
251 | for (i = 0; i < block->hdr.rcpus; i++) { | |
252 | if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) | |
253 | continue; | |
254 | ||
255 | switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { | |
256 | case CP: | |
257 | cpu_inf = &part_inf->cp; | |
258 | if (block->cpus[i].cur_weight < DED_WEIGHT) | |
259 | weight_cp |= block->cpus[i].cur_weight; | |
260 | break; | |
261 | case IFL: | |
262 | cpu_inf = &part_inf->ifl; | |
263 | if (block->cpus[i].cur_weight < DED_WEIGHT) | |
264 | weight_ifl |= block->cpus[i].cur_weight; | |
265 | break; | |
266 | default: | |
267 | continue; | |
268 | } | |
269 | ||
270 | if (!this_lpar) | |
271 | continue; | |
272 | ||
273 | capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; | |
274 | cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; | |
275 | cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; | |
276 | ||
277 | if (block->cpus[i].weight == DED_WEIGHT) | |
278 | cpu_inf->cpu_num_ded += 1; | |
279 | else | |
280 | cpu_inf->cpu_num_shd += 1; | |
281 | } | |
282 | ||
283 | if (this_lpar && capped) { | |
284 | part_inf->cp.lpar_weight = weight_cp; | |
285 | part_inf->ifl.lpar_weight = weight_ifl; | |
286 | } | |
287 | part_inf->cp.all_weight += weight_cp; | |
288 | part_inf->ifl.all_weight += weight_ifl; | |
289 | return (struct diag204_x_part_block *)&block->cpus[i]; | |
290 | } | |
291 | ||
292 | static void fill_diag(struct sthyi_sctns *sctns) | |
293 | { | |
294 | int i, r, pages; | |
295 | bool this_lpar; | |
296 | void *diag204_buf; | |
297 | void *diag224_buf = NULL; | |
298 | struct diag204_x_info_blk_hdr *ti_hdr; | |
299 | struct diag204_x_part_block *part_block; | |
300 | struct diag204_x_phys_block *phys_block; | |
301 | struct lpar_cpu_inf lpar_inf = {}; | |
302 | ||
303 | /* Errors are handled through the validity bits in the response. */ | |
304 | pages = diag204((unsigned long)DIAG204_SUBC_RSI | | |
305 | (unsigned long)DIAG204_INFO_EXT, 0, NULL); | |
306 | if (pages <= 0) | |
307 | return; | |
308 | ||
309 | diag204_buf = vmalloc(PAGE_SIZE * pages); | |
310 | if (!diag204_buf) | |
311 | return; | |
312 | ||
313 | r = diag204((unsigned long)DIAG204_SUBC_STIB7 | | |
314 | (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); | |
315 | if (r < 0) | |
316 | goto out; | |
317 | ||
318 | diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); | |
319 | if (!diag224_buf || diag224(diag224_buf)) | |
320 | goto out; | |
321 | ||
322 | ti_hdr = diag204_buf; | |
323 | part_block = diag204_buf + sizeof(*ti_hdr); | |
324 | ||
325 | for (i = 0; i < ti_hdr->npar; i++) { | |
326 | /* | |
327 | * For the calling lpar we also need to get the cpu | |
328 | * caps and weights. The time information block header | |
329 | * specifies the offset to the partition block of the | |
330 | * caller lpar, so we know when we process its data. | |
331 | */ | |
332 | this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; | |
333 | part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, | |
334 | part_block); | |
335 | } | |
336 | ||
337 | phys_block = (struct diag204_x_phys_block *)part_block; | |
338 | part_block = diag204_buf + ti_hdr->this_part; | |
339 | if (part_block->hdr.mtid) | |
340 | sctns->par.infpflg1 = PAR_MT_EN; | |
341 | ||
342 | sctns->par.infpval1 |= PAR_GRP_VLD; | |
343 | sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); | |
344 | sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); | |
345 | memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, | |
346 | sizeof(sctns->par.infplgnm)); | |
347 | ||
348 | sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; | |
349 | sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; | |
350 | sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; | |
351 | sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; | |
352 | sctns->par.infpval1 |= PAR_PCNT_VLD; | |
353 | ||
354 | sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); | |
355 | sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); | |
356 | sctns->par.infpval1 |= PAR_ABS_VLD; | |
357 | ||
358 | /* | |
359 | * Everything below needs global performance data to be | |
360 | * meaningful. | |
361 | */ | |
362 | if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { | |
363 | sctns->hdr.infhflg1 |= HDR_PERF_UNAV; | |
364 | goto out; | |
365 | } | |
366 | ||
367 | fill_diag_mac(sctns, phys_block, diag224_buf); | |
368 | ||
369 | if (lpar_inf.cp.lpar_weight) { | |
370 | sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * | |
371 | lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; | |
372 | } | |
373 | ||
374 | if (lpar_inf.ifl.lpar_weight) { | |
375 | sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * | |
376 | lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; | |
377 | } | |
378 | sctns->par.infpval1 |= PAR_WGHT_VLD; | |
379 | ||
380 | out: | |
381 | kfree(diag224_buf); | |
382 | vfree(diag204_buf); | |
383 | } | |
384 | ||
385 | static int sthyi(u64 vaddr) | |
386 | { | |
387 | register u64 code asm("0") = 0; | |
388 | register u64 addr asm("2") = vaddr; | |
389 | int cc; | |
390 | ||
391 | asm volatile( | |
392 | ".insn rre,0xB2560000,%[code],%[addr]\n" | |
393 | "ipm %[cc]\n" | |
394 | "srl %[cc],28\n" | |
395 | : [cc] "=d" (cc) | |
396 | : [code] "d" (code), [addr] "a" (addr) | |
397 | : "memory", "cc"); | |
398 | return cc; | |
399 | } | |
400 | ||
401 | int handle_sthyi(struct kvm_vcpu *vcpu) | |
402 | { | |
403 | int reg1, reg2, r = 0; | |
404 | u64 code, addr, cc = 0; | |
405 | struct sthyi_sctns *sctns = NULL; | |
406 | ||
7d0a5e62 JF |
407 | /* |
408 | * STHYI requires extensive locking in the higher hypervisors | |
409 | * and is very computational/memory expensive. Therefore we | |
410 | * ratelimit the executions per VM. | |
411 | */ | |
412 | if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) { | |
413 | kvm_s390_retry_instr(vcpu); | |
414 | return 0; | |
415 | } | |
416 | ||
95ca2cb5 JF |
417 | kvm_s390_get_regs_rre(vcpu, ®1, ®2); |
418 | code = vcpu->run->s.regs.gprs[reg1]; | |
419 | addr = vcpu->run->s.regs.gprs[reg2]; | |
420 | ||
421 | vcpu->stat.instruction_sthyi++; | |
422 | VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); | |
423 | trace_kvm_s390_handle_sthyi(vcpu, code, addr); | |
424 | ||
425 | if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) | |
426 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | |
427 | ||
428 | if (code & 0xffff) { | |
429 | cc = 3; | |
430 | goto out; | |
431 | } | |
432 | ||
433 | /* | |
434 | * If the page has not yet been faulted in, we want to do that | |
435 | * now and not after all the expensive calculations. | |
436 | */ | |
437 | r = write_guest(vcpu, addr, reg2, &cc, 1); | |
438 | if (r) | |
439 | return kvm_s390_inject_prog_cond(vcpu, r); | |
440 | ||
441 | sctns = (void *)get_zeroed_page(GFP_KERNEL); | |
442 | if (!sctns) | |
443 | return -ENOMEM; | |
444 | ||
445 | /* | |
446 | * If we are a guest, we don't want to emulate an emulated | |
447 | * instruction. We ask the hypervisor to provide the data. | |
448 | */ | |
449 | if (test_facility(74)) { | |
450 | cc = sthyi((u64)sctns); | |
451 | goto out; | |
452 | } | |
453 | ||
454 | fill_hdr(sctns); | |
455 | fill_stsi(sctns); | |
456 | fill_diag(sctns); | |
457 | ||
458 | out: | |
459 | if (!cc) { | |
460 | r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); | |
461 | if (r) { | |
462 | free_page((unsigned long)sctns); | |
463 | return kvm_s390_inject_prog_cond(vcpu, r); | |
464 | } | |
465 | } | |
466 | ||
467 | free_page((unsigned long)sctns); | |
468 | vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0; | |
469 | kvm_s390_set_psw_cc(vcpu, cc); | |
470 | return r; | |
471 | } |