uprobes/core: Make order of function parameters consistent across functions
[deliverable/linux.git] / arch / x86 / kernel / uprobes.c
1 /*
2 * User-space Probes (UProbes) for x86
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2008-2011
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
22 */
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/ptrace.h>
26 #include <linux/uprobes.h>
27
28 #include <linux/kdebug.h>
29 #include <asm/insn.h>
30
31 /* Post-execution fixups. */
32
33 /* No fixup needed */
34 #define UPROBE_FIX_NONE 0x0
35 /* Adjust IP back to vicinity of actual insn */
36 #define UPROBE_FIX_IP 0x1
37 /* Adjust the return address of a call insn */
38 #define UPROBE_FIX_CALL 0x2
39
40 #define UPROBE_FIX_RIP_AX 0x8000
41 #define UPROBE_FIX_RIP_CX 0x4000
42
43 /* Adaptations for mhiramat x86 decoder v14. */
44 #define OPCODE1(insn) ((insn)->opcode.bytes[0])
45 #define OPCODE2(insn) ((insn)->opcode.bytes[1])
46 #define OPCODE3(insn) ((insn)->opcode.bytes[2])
47 #define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
48
49 #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
50 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
51 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
52 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
53 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
54 << (row % 32))
55
56 /*
57 * Good-instruction tables for 32-bit apps. This is non-const and volatile
58 * to keep gcc from statically optimizing it out, as variable_test_bit makes
59 * some versions of gcc to think only *(unsigned long*) is used.
60 */
61 static volatile u32 good_insns_32[256 / 32] = {
62 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
63 /* ---------------------------------------------- */
64 W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
65 W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
66 W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
67 W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
68 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
69 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
70 W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
71 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
72 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
73 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
74 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
75 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
76 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
77 W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
78 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
79 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
80 /* ---------------------------------------------- */
81 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
82 };
83
84 /* Using this for both 64-bit and 32-bit apps */
85 static volatile u32 good_2byte_insns[256 / 32] = {
86 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
87 /* ---------------------------------------------- */
88 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
89 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
90 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
91 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
92 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
93 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
94 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
95 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
96 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
97 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
98 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
99 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
100 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
101 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
102 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
103 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
104 /* ---------------------------------------------- */
105 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
106 };
107
108 #ifdef CONFIG_X86_64
109 /* Good-instruction tables for 64-bit apps */
110 static volatile u32 good_insns_64[256 / 32] = {
111 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
112 /* ---------------------------------------------- */
113 W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
114 W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
115 W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
116 W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
117 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
118 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
119 W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
120 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
121 W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
122 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
123 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
124 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
125 W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
126 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
127 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
128 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
129 /* ---------------------------------------------- */
130 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
131 };
132 #endif
133 #undef W
134
135 /*
136 * opcodes we'll probably never support:
137 *
138 * 6c-6d, e4-e5, ec-ed - in
139 * 6e-6f, e6-e7, ee-ef - out
140 * cc, cd - int3, int
141 * cf - iret
142 * d6 - illegal instruction
143 * f1 - int1/icebp
144 * f4 - hlt
145 * fa, fb - cli, sti
146 * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
147 *
148 * invalid opcodes in 64-bit mode:
149 *
150 * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
151 * 63 - we support this opcode in x86_64 but not in i386.
152 *
153 * opcodes we may need to refine support for:
154 *
155 * 0f - 2-byte instructions: For many of these instructions, the validity
156 * depends on the prefix and/or the reg field. On such instructions, we
157 * just consider the opcode combination valid if it corresponds to any
158 * valid instruction.
159 *
160 * 8f - Group 1 - only reg = 0 is OK
161 * c6-c7 - Group 11 - only reg = 0 is OK
162 * d9-df - fpu insns with some illegal encodings
163 * f2, f3 - repnz, repz prefixes. These are also the first byte for
164 * certain floating-point instructions, such as addsd.
165 *
166 * fe - Group 4 - only reg = 0 or 1 is OK
167 * ff - Group 5 - only reg = 0-6 is OK
168 *
169 * others -- Do we need to support these?
170 *
171 * 0f - (floating-point?) prefetch instructions
172 * 07, 17, 1f - pop es, pop ss, pop ds
173 * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
174 * but 64 and 65 (fs: and gs:) seem to be used, so we support them
175 * 67 - addr16 prefix
176 * ce - into
177 * f0 - lock prefix
178 */
179
180 /*
181 * TODO:
182 * - Where necessary, examine the modrm byte and allow only valid instructions
183 * in the different Groups and fpu instructions.
184 */
185
186 static bool is_prefix_bad(struct insn *insn)
187 {
188 int i;
189
190 for (i = 0; i < insn->prefixes.nbytes; i++) {
191 switch (insn->prefixes.bytes[i]) {
192 case 0x26: /* INAT_PFX_ES */
193 case 0x2E: /* INAT_PFX_CS */
194 case 0x36: /* INAT_PFX_DS */
195 case 0x3E: /* INAT_PFX_SS */
196 case 0xF0: /* INAT_PFX_LOCK */
197 return true;
198 }
199 }
200 return false;
201 }
202
203 static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
204 {
205 insn_init(insn, auprobe->insn, false);
206
207 /* Skip good instruction prefixes; reject "bad" ones. */
208 insn_get_opcode(insn);
209 if (is_prefix_bad(insn))
210 return -ENOTSUPP;
211
212 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
213 return 0;
214
215 if (insn->opcode.nbytes == 2) {
216 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
217 return 0;
218 }
219
220 return -ENOTSUPP;
221 }
222
223 /*
224 * Figure out which fixups post_xol() will need to perform, and annotate
225 * arch_uprobe->fixups accordingly. To start with,
226 * arch_uprobe->fixups is either zero or it reflects rip-related
227 * fixups.
228 */
229 static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
230 {
231 bool fix_ip = true, fix_call = false; /* defaults */
232 int reg;
233
234 insn_get_opcode(insn); /* should be a nop */
235
236 switch (OPCODE1(insn)) {
237 case 0xc3: /* ret/lret */
238 case 0xcb:
239 case 0xc2:
240 case 0xca:
241 /* ip is correct */
242 fix_ip = false;
243 break;
244 case 0xe8: /* call relative - Fix return addr */
245 fix_call = true;
246 break;
247 case 0x9a: /* call absolute - Fix return addr, not ip */
248 fix_call = true;
249 fix_ip = false;
250 break;
251 case 0xff:
252 insn_get_modrm(insn);
253 reg = MODRM_REG(insn);
254 if (reg == 2 || reg == 3) {
255 /* call or lcall, indirect */
256 /* Fix return addr; ip is correct. */
257 fix_call = true;
258 fix_ip = false;
259 } else if (reg == 4 || reg == 5) {
260 /* jmp or ljmp, indirect */
261 /* ip is correct. */
262 fix_ip = false;
263 }
264 break;
265 case 0xea: /* jmp absolute -- ip is correct */
266 fix_ip = false;
267 break;
268 default:
269 break;
270 }
271 if (fix_ip)
272 auprobe->fixups |= UPROBE_FIX_IP;
273 if (fix_call)
274 auprobe->fixups |= UPROBE_FIX_CALL;
275 }
276
277 #ifdef CONFIG_X86_64
278 /*
279 * If arch_uprobe->insn doesn't use rip-relative addressing, return
280 * immediately. Otherwise, rewrite the instruction so that it accesses
281 * its memory operand indirectly through a scratch register. Set
282 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
283 * accordingly. (The contents of the scratch register will be saved
284 * before we single-step the modified instruction, and restored
285 * afterward.)
286 *
287 * We do this because a rip-relative instruction can access only a
288 * relatively small area (+/- 2 GB from the instruction), and the XOL
289 * area typically lies beyond that area. At least for instructions
290 * that store to memory, we can't execute the original instruction
291 * and "fix things up" later, because the misdirected store could be
292 * disastrous.
293 *
294 * Some useful facts about rip-relative instructions:
295 *
296 * - There's always a modrm byte.
297 * - There's never a SIB byte.
298 * - The displacement is always 4 bytes.
299 */
300 static void
301 handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
302 {
303 u8 *cursor;
304 u8 reg;
305
306 if (mm->context.ia32_compat)
307 return;
308
309 auprobe->rip_rela_target_address = 0x0;
310 if (!insn_rip_relative(insn))
311 return;
312
313 /*
314 * insn_rip_relative() would have decoded rex_prefix, modrm.
315 * Clear REX.b bit (extension of MODRM.rm field):
316 * we want to encode rax/rcx, not r8/r9.
317 */
318 if (insn->rex_prefix.nbytes) {
319 cursor = auprobe->insn + insn_offset_rex_prefix(insn);
320 *cursor &= 0xfe; /* Clearing REX.B bit */
321 }
322
323 /*
324 * Point cursor at the modrm byte. The next 4 bytes are the
325 * displacement. Beyond the displacement, for some instructions,
326 * is the immediate operand.
327 */
328 cursor = auprobe->insn + insn_offset_modrm(insn);
329 insn_get_length(insn);
330
331 /*
332 * Convert from rip-relative addressing to indirect addressing
333 * via a scratch register. Change the r/m field from 0x5 (%rip)
334 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
335 */
336 reg = MODRM_REG(insn);
337 if (reg == 0) {
338 /*
339 * The register operand (if any) is either the A register
340 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
341 * REX prefix) %r8. In any case, we know the C register
342 * is NOT the register operand, so we use %rcx (register
343 * #1) for the scratch register.
344 */
345 auprobe->fixups = UPROBE_FIX_RIP_CX;
346 /* Change modrm from 00 000 101 to 00 000 001. */
347 *cursor = 0x1;
348 } else {
349 /* Use %rax (register #0) for the scratch register. */
350 auprobe->fixups = UPROBE_FIX_RIP_AX;
351 /* Change modrm from 00 xxx 101 to 00 xxx 000 */
352 *cursor = (reg << 3);
353 }
354
355 /* Target address = address of next instruction + (signed) offset */
356 auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
357
358 /* Displacement field is gone; slide immediate field (if any) over. */
359 if (insn->immediate.nbytes) {
360 cursor++;
361 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
362 }
363 return;
364 }
365
366 static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
367 {
368 insn_init(insn, auprobe->insn, true);
369
370 /* Skip good instruction prefixes; reject "bad" ones. */
371 insn_get_opcode(insn);
372 if (is_prefix_bad(insn))
373 return -ENOTSUPP;
374
375 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
376 return 0;
377
378 if (insn->opcode.nbytes == 2) {
379 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
380 return 0;
381 }
382 return -ENOTSUPP;
383 }
384
385 static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
386 {
387 if (mm->context.ia32_compat)
388 return validate_insn_32bits(auprobe, insn);
389 return validate_insn_64bits(auprobe, insn);
390 }
391 #else /* 32-bit: */
392 static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
393 {
394 /* No RIP-relative addressing on 32-bit */
395 }
396
397 static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
398 {
399 return validate_insn_32bits(auprobe, insn);
400 }
401 #endif /* CONFIG_X86_64 */
402
403 /**
404 * arch_uprobes_analyze_insn - instruction analysis including validity and fixups.
405 * @mm: the probed address space.
406 * @arch_uprobe: the probepoint information.
407 * Return 0 on success or a -ve number on error.
408 */
409 int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
410 {
411 int ret;
412 struct insn insn;
413
414 auprobe->fixups = 0;
415 ret = validate_insn_bits(auprobe, mm, &insn);
416 if (ret != 0)
417 return ret;
418
419 handle_riprel_insn(auprobe, mm, &insn);
420 prepare_fixups(auprobe, &insn);
421
422 return 0;
423 }
This page took 0.337856 seconds and 5 git commands to generate.