[deliverable/linux.git] / arch / x86 / kernel / uprobes.c

/*
 * User-space Probes (UProbes) for x86
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2008-2011
 * Authors:
 *	Srikar Dronamraju
 *	Jim Keniston
 */
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <linux/uprobes.h>

#include <linux/kdebug.h>
#include <asm/insn.h>

/* Post-execution fixups. */

/* No fixup needed */
#define UPROBE_FIX_NONE	0x0
/* Adjust IP back to vicinity of actual insn */
#define UPROBE_FIX_IP		0x1
/* Adjust the return address of a call insn */
#define UPROBE_FIX_CALL	0x2

#define UPROBE_FIX_RIP_AX	0x8000
#define UPROBE_FIX_RIP_CX	0x4000

/* Adaptations for mhiramat x86 decoder v14. */
#define OPCODE1(insn)		((insn)->opcode.bytes[0])
#define OPCODE2(insn)		((insn)->opcode.bytes[1])
#define OPCODE3(insn)		((insn)->opcode.bytes[2])
#define MODRM_REG(insn)		X86_MODRM_REG(insn->modrm.value)

#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
	 << (row % 32))

/*
 * Good-instruction tables for 32-bit apps.  This is non-const and volatile
 * to keep gcc from statically optimizing it out, as variable_test_bit makes
 * some versions of gcc to think only *(unsigned long*) is used.
 */
static volatile u32 good_insns_32[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
	W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
	W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
	W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
	W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
	W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};

/* Using this for both 64-bit and 32-bit apps */
static volatile u32 good_2byte_insns[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
	W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
	W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
	W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
	W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};

#ifdef CONFIG_X86_64
/* Good-instruction tables for 64-bit apps */
static volatile u32 good_insns_64[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
	W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
	W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
	W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
	W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};
#endif
#undef W

/*
 * opcodes we'll probably never support:
 *
 *  6c-6d, e4-e5, ec-ed - in
 *  6e-6f, e6-e7, ee-ef - out
 *  cc, cd - int3, int
 *  cf - iret
 *  d6 - illegal instruction
 *  f1 - int1/icebp
 *  f4 - hlt
 *  fa, fb - cli, sti
 *  0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
 *
 * invalid opcodes in 64-bit mode:
 *
 *  06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
 *  63 - we support this opcode in x86_64 but not in i386.
 *
 * opcodes we may need to refine support for:
 *
 *  0f - 2-byte instructions: For many of these instructions, the validity
 *  depends on the prefix and/or the reg field.  On such instructions, we
 *  just consider the opcode combination valid if it corresponds to any
 *  valid instruction.
 *
 *  8f - Group 1 - only reg = 0 is OK
 *  c6-c7 - Group 11 - only reg = 0 is OK
 *  d9-df - fpu insns with some illegal encodings
 *  f2, f3 - repnz, repz prefixes.  These are also the first byte for
 *  certain floating-point instructions, such as addsd.
 *
 *  fe - Group 4 - only reg = 0 or 1 is OK
 *  ff - Group 5 - only reg = 0-6 is OK
 *
 * others -- Do we need to support these?
 *
 *  0f - (floating-point?) prefetch instructions
 *  07, 17, 1f - pop es, pop ss, pop ds
 *  26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
 *	but 64 and 65 (fs: and gs:) seem to be used, so we support them
 *  67 - addr16 prefix
 *  ce - into
 *  f0 - lock prefix
 */

/*
 * TODO:
 * - Where necessary, examine the modrm byte and allow only valid instructions
 * in the different Groups and fpu instructions.
 */

static bool is_prefix_bad(struct insn *insn)
{
	int i;

	for (i = 0; i < insn->prefixes.nbytes; i++) {
		switch (insn->prefixes.bytes[i]) {
		case 0x26:	/* INAT_PFX_ES   */
		case 0x2E:	/* INAT_PFX_CS   */
		case 0x36:	/* INAT_PFX_DS   */
		case 0x3E:	/* INAT_PFX_SS   */
		case 0xF0:	/* INAT_PFX_LOCK */
			return true;
		}
	}
	return false;
}

static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
{
	insn_init(insn, auprobe->insn, false);

	/* Skip good instruction prefixes; reject "bad" ones. */
	insn_get_opcode(insn);
	if (is_prefix_bad(insn))
		return -ENOTSUPP;

	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
		return 0;

	if (insn->opcode.nbytes == 2) {
		if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
			return 0;
	}

	return -ENOTSUPP;
}

/*
 * Figure out which fixups post_xol() will need to perform, and annotate
 * arch_uprobe->fixups accordingly.  To start with,
 * arch_uprobe->fixups is either zero or it reflects rip-related
 * fixups.
 */
static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
{
	bool fix_ip = true, fix_call = false;	/* defaults */
	int reg;

	insn_get_opcode(insn);	/* should be a nop */

	switch (OPCODE1(insn)) {
	case 0xc3:		/* ret/lret */
	case 0xcb:
	case 0xc2:
	case 0xca:
		/* ip is correct */
		fix_ip = false;
		break;
	case 0xe8:		/* call relative - Fix return addr */
		fix_call = true;
		break;
	case 0x9a:		/* call absolute - Fix return addr, not ip */
		fix_call = true;
		fix_ip = false;
		break;
	case 0xff:
		insn_get_modrm(insn);
		reg = MODRM_REG(insn);
		if (reg == 2 || reg == 3) {
			/* call or lcall, indirect */
			/* Fix return addr; ip is correct. */
			fix_call = true;
			fix_ip = false;
		} else if (reg == 4 || reg == 5) {
			/* jmp or ljmp, indirect */
			/* ip is correct. */
			fix_ip = false;
		}
		break;
	case 0xea:		/* jmp absolute -- ip is correct */
		fix_ip = false;
		break;
	default:
		break;
	}
	if (fix_ip)
		auprobe->fixups |= UPROBE_FIX_IP;
	if (fix_call)
		auprobe->fixups |= UPROBE_FIX_CALL;
}

#ifdef CONFIG_X86_64
/*
 * If arch_uprobe->insn doesn't use rip-relative addressing, return
 * immediately.  Otherwise, rewrite the instruction so that it accesses
 * its memory operand indirectly through a scratch register.  Set
 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
 * accordingly.  (The contents of the scratch register will be saved
 * before we single-step the modified instruction, and restored
 * afterward.)
 *
 * We do this because a rip-relative instruction can access only a
 * relatively small area (+/- 2 GB from the instruction), and the XOL
 * area typically lies beyond that area.  At least for instructions
 * that store to memory, we can't execute the original instruction
 * and "fix things up" later, because the misdirected store could be
 * disastrous.
 *
 * Some useful facts about rip-relative instructions:
 *
 *  - There's always a modrm byte.
 *  - There's never a SIB byte.
 *  - The displacement is always 4 bytes.
 */
static void
handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
{
	u8 *cursor;
	u8 reg;

	if (mm->context.ia32_compat)
		return;

	auprobe->rip_rela_target_address = 0x0;
	if (!insn_rip_relative(insn))
		return;

	/*
	 * insn_rip_relative() would have decoded rex_prefix, modrm.
	 * Clear REX.b bit (extension of MODRM.rm field):
	 * we want to encode rax/rcx, not r8/r9.
	 */
	if (insn->rex_prefix.nbytes) {
		cursor = auprobe->insn + insn_offset_rex_prefix(insn);
		*cursor &= 0xfe;	/* Clearing REX.B bit */
	}

	/*
	 * Point cursor at the modrm byte.  The next 4 bytes are the
	 * displacement.  Beyond the displacement, for some instructions,
	 * is the immediate operand.
	 */
	cursor = auprobe->insn + insn_offset_modrm(insn);
	insn_get_length(insn);

	/*
	 * Convert from rip-relative addressing to indirect addressing
	 * via a scratch register.  Change the r/m field from 0x5 (%rip)
	 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
	 */
	reg = MODRM_REG(insn);
	if (reg == 0) {
		/*
		 * The register operand (if any) is either the A register
		 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
		 * REX prefix) %r8.  In any case, we know the C register
		 * is NOT the register operand, so we use %rcx (register
		 * #1) for the scratch register.
		 */
		auprobe->fixups = UPROBE_FIX_RIP_CX;
		/* Change modrm from 00 000 101 to 00 000 001. */
		*cursor = 0x1;
	} else {
		/* Use %rax (register #0) for the scratch register. */
		auprobe->fixups = UPROBE_FIX_RIP_AX;
		/* Change modrm from 00 xxx 101 to 00 xxx 000 */
		*cursor = (reg << 3);
	}

	/* Target address = address of next instruction + (signed) offset */
	auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;

	/* Displacement field is gone; slide immediate field (if any) over. */
	if (insn->immediate.nbytes) {
		cursor++;
		memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
	}
	return;
}

static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
{
	insn_init(insn, auprobe->insn, true);

	/* Skip good instruction prefixes; reject "bad" ones. */
	insn_get_opcode(insn);
	if (is_prefix_bad(insn))
		return -ENOTSUPP;

	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
		return 0;

	if (insn->opcode.nbytes == 2) {
		if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
			return 0;
	}
	return -ENOTSUPP;
}

static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
{
	if (mm->context.ia32_compat)
		return validate_insn_32bits(auprobe, insn);
	return validate_insn_64bits(auprobe, insn);
}
#else /* 32-bit: */
static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
{
	/* No RIP-relative addressing on 32-bit */
}

static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  struct insn *insn)
{
	return validate_insn_32bits(auprobe, insn);
}
#endif /* CONFIG_X86_64 */

/**
 * arch_uprobes_analyze_insn - instruction analysis including validity and fixups.
 * @mm: the probed address space.
 * @arch_uprobe: the probepoint information.
 * Return 0 on success or a -ve number on error.
 */
int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
{
	int ret;
	struct insn insn;

	auprobe->fixups = 0;
	ret = validate_insn_bits(auprobe, mm, &insn);
	if (ret != 0)
		return ret;

	handle_riprel_insn(auprobe, mm, &insn);
	prepare_fixups(auprobe, &insn);

	return 0;
}
Commit	Line	Data
2b144498	1	/*
7b2d81d4	2	* User-space Probes (UProbes) for x86
2b144498 SD	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License as published by
	6	* the Free Software Foundation; either version 2 of the License, or
	7	* (at your option) any later version.
	8	*
	9	* This program is distributed in the hope that it will be useful,
	10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	* GNU General Public License for more details.
	13	*
	14	* You should have received a copy of the GNU General Public License
	15	* along with this program; if not, write to the Free Software
	16	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	17	*
	18	* Copyright (C) IBM Corporation, 2008-2011
	19	* Authors:
	20	* Srikar Dronamraju
	21	* Jim Keniston
	22	*/
2b144498 SD	23	#include <linux/kernel.h>
	24	#include <linux/sched.h>
	25	#include <linux/ptrace.h>
	26	#include <linux/uprobes.h>
	27
	28	#include <linux/kdebug.h>
	29	#include <asm/insn.h>
	30
	31	/* Post-execution fixups. */
	32
	33	/* No fixup needed */
900771a4	34	#define UPROBE_FIX_NONE 0x0
2b144498	35	/* Adjust IP back to vicinity of actual insn */
900771a4	36	#define UPROBE_FIX_IP 0x1
2b144498	37	/* Adjust the return address of a call insn */
900771a4	38	#define UPROBE_FIX_CALL 0x2
2b144498	39
900771a4 SD	40	#define UPROBE_FIX_RIP_AX 0x8000
900771a4 SD	41	#define UPROBE_FIX_RIP_CX 0x4000
2b144498 SD	42
2b144498 SD	43	/* Adaptations for mhiramat x86 decoder v14. */
7b2d81d4 IM	44	#define OPCODE1(insn) ((insn)->opcode.bytes[0])
	45	#define OPCODE2(insn) ((insn)->opcode.bytes[1])
	46	#define OPCODE3(insn) ((insn)->opcode.bytes[2])
	47	#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
2b144498 SD	48
	49	#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
	50	(((b0##UL << 0x0)\|(b1##UL << 0x1)\|(b2##UL << 0x2)\|(b3##UL << 0x3) \| \
	51	(b4##UL << 0x4)\|(b5##UL << 0x5)\|(b6##UL << 0x6)\|(b7##UL << 0x7) \| \
	52	(b8##UL << 0x8)\|(b9##UL << 0x9)\|(ba##UL << 0xa)\|(bb##UL << 0xb) \| \
	53	(bc##UL << 0xc)\|(bd##UL << 0xd)\|(be##UL << 0xe)\|(bf##UL << 0xf)) \
	54	<< (row % 32))
	55
04a3d984 SD	56	/*
	57	* Good-instruction tables for 32-bit apps. This is non-const and volatile
	58	* to keep gcc from statically optimizing it out, as variable_test_bit makes
	59	* some versions of gcc to think only (unsigned long) is used.
	60	*/
	61	static volatile u32 good_insns_32[256 / 32] = {
2b144498 SD	62	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
	63	/* ---------------------------------------------- */
	64	W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) \| /* 00 */
	65	W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
	66	W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) \| /* 20 */
	67	W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
	68	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 40 */
	69	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	70	W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) \| /* 60 */
	71	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
	72	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 80 */
	73	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	74	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* a0 */
	75	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	76	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) \| /* c0 */
	77	W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	78	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) \| /* e0 */
	79	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
	80	/* ---------------------------------------------- */
	81	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
	82	};
	83
	84	/* Using this for both 64-bit and 32-bit apps */
04a3d984	85	static volatile u32 good_2byte_insns[256 / 32] = {
2b144498 SD	86	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
	87	/* ---------------------------------------------- */
	88	W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) \| /* 00 */
	89	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
	90	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 20 */
	91	W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
	92	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 40 */
	93	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	94	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 60 */
	95	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
	96	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 80 */
	97	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	98	W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) \| /* a0 */
	99	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	100	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* c0 */
	101	W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	102	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* e0 */
	103	W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
	104	/* ---------------------------------------------- */
	105	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
	106	};
	107
04a3d984 SD	108	#ifdef CONFIG_X86_64
	109	/* Good-instruction tables for 64-bit apps */
	110	static volatile u32 good_insns_64[256 / 32] = {
	111	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
	112	/* ---------------------------------------------- */
	113	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) \| /* 00 */
	114	W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
	115	W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) \| /* 20 */
	116	W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
	117	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) \| /* 40 */
	118	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	119	W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) \| /* 60 */
	120	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
	121	W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 80 */
	122	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	123	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* a0 */
	124	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	125	W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) \| /* c0 */
	126	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	127	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) \| /* e0 */
	128	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
	129	/* ---------------------------------------------- */
	130	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
	131	};
	132	#endif
2b144498 SD	133	#undef W
	134
	135	/*
	136	* opcodes we'll probably never support:
7b2d81d4 IM	137	*
	138	* 6c-6d, e4-e5, ec-ed - in
	139	* 6e-6f, e6-e7, ee-ef - out
	140	* cc, cd - int3, int
	141	* cf - iret
	142	* d6 - illegal instruction
	143	* f1 - int1/icebp
	144	* f4 - hlt
	145	* fa, fb - cli, sti
	146	* 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
2b144498 SD	147	*
2b144498 SD	148	* invalid opcodes in 64-bit mode:
2b144498	149	*
7b2d81d4 IM	150	* 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
7b2d81d4 IM	151	* 63 - we support this opcode in x86_64 but not in i386.
2b144498 SD	152	*
2b144498 SD	153	* opcodes we may need to refine support for:
7b2d81d4 IM	154	*
	155	* 0f - 2-byte instructions: For many of these instructions, the validity
	156	* depends on the prefix and/or the reg field. On such instructions, we
	157	* just consider the opcode combination valid if it corresponds to any
	158	* valid instruction.
	159	*
	160	* 8f - Group 1 - only reg = 0 is OK
	161	* c6-c7 - Group 11 - only reg = 0 is OK
	162	* d9-df - fpu insns with some illegal encodings
	163	* f2, f3 - repnz, repz prefixes. These are also the first byte for
	164	* certain floating-point instructions, such as addsd.
	165	*
	166	* fe - Group 4 - only reg = 0 or 1 is OK
	167	* ff - Group 5 - only reg = 0-6 is OK
2b144498 SD	168	*
2b144498 SD	169	* others -- Do we need to support these?
7b2d81d4 IM	170	*
	171	* 0f - (floating-point?) prefetch instructions
	172	* 07, 17, 1f - pop es, pop ss, pop ds
	173	* 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
2b144498	174	* but 64 and 65 (fs: and gs:) seem to be used, so we support them
7b2d81d4 IM	175	* 67 - addr16 prefix
	176	* ce - into
	177	* f0 - lock prefix
2b144498 SD	178	*/
	179
	180	/*
	181	* TODO:
	182	* - Where necessary, examine the modrm byte and allow only valid instructions
	183	* in the different Groups and fpu instructions.
	184	*/
	185
	186	static bool is_prefix_bad(struct insn *insn)
	187	{
	188	int i;
	189
	190	for (i = 0; i < insn->prefixes.nbytes; i++) {
	191	switch (insn->prefixes.bytes[i]) {
7b2d81d4 IM	192	case 0x26: /* INAT_PFX_ES */
	193	case 0x2E: /* INAT_PFX_CS */
	194	case 0x36: /* INAT_PFX_DS */
	195	case 0x3E: /* INAT_PFX_SS */
	196	case 0xF0: /* INAT_PFX_LOCK */
2b144498 SD	197	return true;
	198	}
	199	}
	200	return false;
	201	}
	202
3ff54efd	203	static int validate_insn_32bits(struct arch_uprobe auprobe, struct insn insn)
2b144498	204	{
3ff54efd	205	insn_init(insn, auprobe->insn, false);
2b144498 SD	206
	207	/* Skip good instruction prefixes; reject "bad" ones. */
	208	insn_get_opcode(insn);
	209	if (is_prefix_bad(insn))
	210	return -ENOTSUPP;
7b2d81d4	211
2b144498 SD	212	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
2b144498 SD	213	return 0;
7b2d81d4	214
2b144498 SD	215	if (insn->opcode.nbytes == 2) {
	216	if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
	217	return 0;
	218	}
7b2d81d4	219
2b144498 SD	220	return -ENOTSUPP;
	221	}
	222
	223	/*
	224	* Figure out which fixups post_xol() will need to perform, and annotate
3ff54efd SD	225	* arch_uprobe->fixups accordingly. To start with,
3ff54efd SD	226	* arch_uprobe->fixups is either zero or it reflects rip-related
2b144498 SD	227	* fixups.
2b144498 SD	228	*/
3ff54efd	229	static void prepare_fixups(struct arch_uprobe auprobe, struct insn insn)
2b144498 SD	230	{
	231	bool fix_ip = true, fix_call = false; /* defaults */
	232	int reg;
	233
	234	insn_get_opcode(insn); /* should be a nop */
	235
	236	switch (OPCODE1(insn)) {
	237	case 0xc3: /* ret/lret */
	238	case 0xcb:
	239	case 0xc2:
	240	case 0xca:
	241	/* ip is correct */
	242	fix_ip = false;
	243	break;
	244	case 0xe8: /* call relative - Fix return addr */
	245	fix_call = true;
	246	break;
	247	case 0x9a: /* call absolute - Fix return addr, not ip */
	248	fix_call = true;
	249	fix_ip = false;
	250	break;
	251	case 0xff:
	252	insn_get_modrm(insn);
	253	reg = MODRM_REG(insn);
	254	if (reg == 2 \|\| reg == 3) {
	255	/* call or lcall, indirect */
	256	/* Fix return addr; ip is correct. */
	257	fix_call = true;
	258	fix_ip = false;
	259	} else if (reg == 4 \|\| reg == 5) {
	260	/* jmp or ljmp, indirect */
	261	/* ip is correct. */
	262	fix_ip = false;
	263	}
	264	break;
	265	case 0xea: /* jmp absolute -- ip is correct */
	266	fix_ip = false;
	267	break;
	268	default:
	269	break;
	270	}
	271	if (fix_ip)
900771a4	272	auprobe->fixups \|= UPROBE_FIX_IP;
2b144498	273	if (fix_call)
900771a4	274	auprobe->fixups \|= UPROBE_FIX_CALL;
2b144498 SD	275	}
	276
	277	#ifdef CONFIG_X86_64
	278	/*
3ff54efd	279	* If arch_uprobe->insn doesn't use rip-relative addressing, return
2b144498 SD	280	* immediately. Otherwise, rewrite the instruction so that it accesses
2b144498 SD	281	* its memory operand indirectly through a scratch register. Set
3ff54efd	282	* arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
2b144498 SD	283	* accordingly. (The contents of the scratch register will be saved
	284	* before we single-step the modified instruction, and restored
	285	* afterward.)
	286	*
	287	* We do this because a rip-relative instruction can access only a
	288	* relatively small area (+/- 2 GB from the instruction), and the XOL
	289	* area typically lies beyond that area. At least for instructions
	290	* that store to memory, we can't execute the original instruction
	291	* and "fix things up" later, because the misdirected store could be
	292	* disastrous.
	293	*
	294	* Some useful facts about rip-relative instructions:
7b2d81d4 IM	295	*
	296	* - There's always a modrm byte.
	297	* - There's never a SIB byte.
	298	* - The displacement is always 4 bytes.
2b144498	299	*/
e3343e6a SD	300	static void
e3343e6a SD	301	handle_riprel_insn(struct arch_uprobe auprobe, struct mm_struct mm, struct insn *insn)
2b144498 SD	302	{
	303	u8 *cursor;
	304	u8 reg;
	305
	306	if (mm->context.ia32_compat)
	307	return;
	308
3ff54efd	309	auprobe->rip_rela_target_address = 0x0;
2b144498 SD	310	if (!insn_rip_relative(insn))
	311	return;
	312
	313	/*
	314	* insn_rip_relative() would have decoded rex_prefix, modrm.
	315	* Clear REX.b bit (extension of MODRM.rm field):
	316	* we want to encode rax/rcx, not r8/r9.
	317	*/
	318	if (insn->rex_prefix.nbytes) {
3ff54efd	319	cursor = auprobe->insn + insn_offset_rex_prefix(insn);
2b144498 SD	320	cursor &= 0xfe; / Clearing REX.B bit */
	321	}
	322
	323	/*
	324	* Point cursor at the modrm byte. The next 4 bytes are the
	325	* displacement. Beyond the displacement, for some instructions,
	326	* is the immediate operand.
	327	*/
3ff54efd	328	cursor = auprobe->insn + insn_offset_modrm(insn);
2b144498 SD	329	insn_get_length(insn);
	330
	331	/*
	332	* Convert from rip-relative addressing to indirect addressing
	333	* via a scratch register. Change the r/m field from 0x5 (%rip)
	334	* to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
	335	*/
	336	reg = MODRM_REG(insn);
	337	if (reg == 0) {
	338	/*
	339	* The register operand (if any) is either the A register
	340	* (%rax, %eax, etc.) or (if the 0x4 bit is set in the
	341	* REX prefix) %r8. In any case, we know the C register
	342	* is NOT the register operand, so we use %rcx (register
	343	* #1) for the scratch register.
	344	*/
900771a4	345	auprobe->fixups = UPROBE_FIX_RIP_CX;
2b144498 SD	346	/* Change modrm from 00 000 101 to 00 000 001. */
	347	*cursor = 0x1;
	348	} else {
	349	/* Use %rax (register #0) for the scratch register. */
900771a4	350	auprobe->fixups = UPROBE_FIX_RIP_AX;
2b144498 SD	351	/* Change modrm from 00 xxx 101 to 00 xxx 000 */
	352	*cursor = (reg << 3);
	353	}
	354
	355	/* Target address = address of next instruction + (signed) offset */
3ff54efd	356	auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
7b2d81d4	357
2b144498 SD	358	/* Displacement field is gone; slide immediate field (if any) over. */
	359	if (insn->immediate.nbytes) {
	360	cursor++;
7b2d81d4	361	memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
2b144498 SD	362	}
	363	return;
	364	}
	365
3ff54efd	366	static int validate_insn_64bits(struct arch_uprobe auprobe, struct insn insn)
2b144498	367	{
3ff54efd	368	insn_init(insn, auprobe->insn, true);
2b144498 SD	369
	370	/* Skip good instruction prefixes; reject "bad" ones. */
	371	insn_get_opcode(insn);
	372	if (is_prefix_bad(insn))
	373	return -ENOTSUPP;
7b2d81d4	374
2b144498 SD	375	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
2b144498 SD	376	return 0;
7b2d81d4	377
2b144498 SD	378	if (insn->opcode.nbytes == 2) {
	379	if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
	380	return 0;
	381	}
	382	return -ENOTSUPP;
	383	}
	384
e3343e6a	385	static int validate_insn_bits(struct arch_uprobe auprobe, struct mm_struct mm, struct insn *insn)
2b144498 SD	386	{
2b144498 SD	387	if (mm->context.ia32_compat)
3ff54efd SD	388	return validate_insn_32bits(auprobe, insn);
3ff54efd SD	389	return validate_insn_64bits(auprobe, insn);
2b144498	390	}
7b2d81d4	391	#else /* 32-bit: */
e3343e6a	392	static void handle_riprel_insn(struct arch_uprobe auprobe, struct mm_struct mm, struct insn *insn)
2b144498	393	{
7b2d81d4	394	/* No RIP-relative addressing on 32-bit */
2b144498 SD	395	}
2b144498 SD	396
e3343e6a	397	static int validate_insn_bits(struct arch_uprobe auprobe, struct mm_struct mm, struct insn *insn)
2b144498	398	{
3ff54efd	399	return validate_insn_32bits(auprobe, insn);
2b144498 SD	400	}
	401	#endif /* CONFIG_X86_64 */
	402
	403	/**
7b2d81d4	404	* arch_uprobes_analyze_insn - instruction analysis including validity and fixups.
2b144498	405	* @mm: the probed address space.
3ff54efd	406	* @arch_uprobe: the probepoint information.
2b144498 SD	407	* Return 0 on success or a -ve number on error.
2b144498 SD	408	*/
e3343e6a	409	int arch_uprobes_analyze_insn(struct arch_uprobe auprobe, struct mm_struct mm)
2b144498 SD	410	{
	411	int ret;
	412	struct insn insn;
	413
3ff54efd	414	auprobe->fixups = 0;
e3343e6a	415	ret = validate_insn_bits(auprobe, mm, &insn);
2b144498 SD	416	if (ret != 0)
2b144498 SD	417	return ret;
7b2d81d4	418
e3343e6a	419	handle_riprel_insn(auprobe, mm, &insn);
3ff54efd	420	prepare_fixups(auprobe, &insn);
7b2d81d4	421
2b144498 SD	422	return 0;
2b144498 SD	423	}