"lock addl $0, (%{re}sp)". */
static int avoid_fence = 0;
-/* Type of the previous instruction. */
+/* 1 if lfence should be inserted after every load. */
+static int lfence_after_load = 0;
+
+/* Non-zero if lfence should be inserted before indirect branch. */
+static enum lfence_before_indirect_branch_kind
+ {
+ lfence_branch_none = 0,
+ lfence_branch_register,
+ lfence_branch_memory,
+ lfence_branch_all
+ }
+lfence_before_indirect_branch;
+
+/* Non-zero if lfence should be inserted before ret. */
+static enum lfence_before_ret_kind
+ {
+ lfence_before_ret_none = 0,
+ lfence_before_ret_not,
+ lfence_before_ret_or,
+ lfence_before_ret_shl
+ }
+lfence_before_ret;
+
+/* Types of previous instruction is .byte or prefix. */
static struct
{
segT seg;
CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
{ STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
CPU_ENQCMD_FLAGS, 0 },
+ { STRING_COMMA_LEN (".serialize"), PROCESSOR_UNKNOWN,
+ CPU_SERIALIZE_FLAGS, 0 },
{ STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
CPU_RDPRU_FLAGS, 0 },
{ STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
CPU_MCOMMIT_FLAGS, 0 },
{ STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
CPU_SEV_ES_FLAGS, 0 },
+ { STRING_COMMA_LEN (".tsxldtrk"), PROCESSOR_UNKNOWN,
+ CPU_TSXLDTRK_FLAGS, 0 },
};
static const noarch_entry cpu_noarch[] =
{ STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
{ STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
{ STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
+ { STRING_COMMA_LEN ("noserialize"), CPU_ANY_SERIALIZE_FLAGS },
+ { STRING_COMMA_LEN ("notsxldtrk"), CPU_ANY_TSXLDTRK_FLAGS },
};
#ifdef I386COFF
here. Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
down-conversion vpmov*. */
|| ((t->operand_types[wanted].bitfield.class == RegSIMD
- && !t->opcode_modifier.broadcast
- && (t->operand_types[wanted].bitfield.byte
- || t->operand_types[wanted].bitfield.word
- || t->operand_types[wanted].bitfield.dword
- || t->operand_types[wanted].bitfield.qword))
+ && t->operand_types[wanted].bitfield.byte
+ + t->operand_types[wanted].bitfield.word
+ + t->operand_types[wanted].bitfield.dword
+ + t->operand_types[wanted].bitfield.qword
+ > !!t->opcode_modifier.broadcast)
? (i.types[given].bitfield.xmmword
|| i.types[given].bitfield.ymmword
|| i.types[given].bitfield.zmmword)
}
}
+/* Return non-zero for load instruction. */
+
+static int
+load_insn_p (void)
+{
+ unsigned int dest;
+ int any_vex_p = is_any_vex_encoding (&i.tm);
+ unsigned int base_opcode = i.tm.base_opcode | 1;
+
+ if (!any_vex_p)
+ {
+ /* Anysize insns: lea, invlpg, clflush, prefetchnta, prefetcht0,
+ prefetcht1, prefetcht2, prefetchtw, bndmk, bndcl, bndcu, bndcn,
+ bndstx, bndldx, prefetchwt1, clflushopt, clwb, cldemote. */
+ if (i.tm.opcode_modifier.anysize)
+ return 0;
+
+ /* pop, popf, popa. */
+ if (strcmp (i.tm.name, "pop") == 0
+ || i.tm.base_opcode == 0x9d
+ || i.tm.base_opcode == 0x61)
+ return 1;
+
+ /* movs, cmps, lods, scas. */
+ if ((i.tm.base_opcode | 0xb) == 0xaf)
+ return 1;
+
+ /* outs, xlatb. */
+ if (base_opcode == 0x6f
+ || i.tm.base_opcode == 0xd7)
+ return 1;
+ /* NB: For AMD-specific insns with implicit memory operands,
+ they're intentionally not covered. */
+ }
+
+ /* No memory operand. */
+ if (!i.mem_operands)
+ return 0;
+
+ if (any_vex_p)
+ {
+ /* vldmxcsr. */
+ if (i.tm.base_opcode == 0xae
+ && i.tm.opcode_modifier.vex
+ && i.tm.opcode_modifier.vexopcode == VEX0F
+ && i.tm.extension_opcode == 2)
+ return 1;
+ }
+ else
+ {
+ /* test, not, neg, mul, imul, div, idiv. */
+ if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
+ && i.tm.extension_opcode != 1)
+ return 1;
+
+ /* inc, dec. */
+ if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
+ return 1;
+
+ /* add, or, adc, sbb, and, sub, xor, cmp. */
+ if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
+ return 1;
+
+ /* bt, bts, btr, btc. */
+ if (i.tm.base_opcode == 0xfba
+ && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
+ return 1;
+
+ /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
+ if ((base_opcode == 0xc1
+ || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
+ && i.tm.extension_opcode != 6)
+ return 1;
+
+ /* cmpxchg8b, cmpxchg16b, xrstors. */
+ if (i.tm.base_opcode == 0xfc7
+ && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3))
+ return 1;
+
+ /* fxrstor, ldmxcsr, xrstor. */
+ if (i.tm.base_opcode == 0xfae
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 5))
+ return 1;
+
+ /* lgdt, lidt, lmsw. */
+ if (i.tm.base_opcode == 0xf01
+ && (i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6))
+ return 1;
+
+ /* vmptrld */
+ if (i.tm.base_opcode == 0xfc7
+ && i.tm.extension_opcode == 6)
+ return 1;
+
+ /* Check for x87 instructions. */
+ if (i.tm.base_opcode >= 0xd8 && i.tm.base_opcode <= 0xdf)
+ {
+ /* Skip fst, fstp, fstenv, fstcw. */
+ if (i.tm.base_opcode == 0xd9
+ && (i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ /* Skip fisttp, fist, fistp, fstp. */
+ if (i.tm.base_opcode == 0xdb
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ /* Skip fisttp, fst, fstp, fsave, fstsw. */
+ if (i.tm.base_opcode == 0xdd
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ /* Skip fisttp, fist, fistp, fbstp, fistp. */
+ if (i.tm.base_opcode == 0xdf
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ return 1;
+ }
+ }
+
+ dest = i.operands - 1;
+
+ /* Check fake imm8 operand and 3 source operands. */
+ if ((i.tm.opcode_modifier.immext
+ || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
+ && i.types[dest].bitfield.imm8)
+ dest--;
+
+ /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg, xadd */
+ if (!any_vex_p
+ && (base_opcode == 0x1
+ || base_opcode == 0x9
+ || base_opcode == 0x11
+ || base_opcode == 0x19
+ || base_opcode == 0x21
+ || base_opcode == 0x29
+ || base_opcode == 0x31
+ || base_opcode == 0x39
+ || (i.tm.base_opcode >= 0x84 && i.tm.base_opcode <= 0x87)
+ || base_opcode == 0xfc1))
+ return 1;
+
+ /* Check for load instruction. */
+ return (i.types[dest].bitfield.class != ClassNone
+ || i.types[dest].bitfield.instance == Accum);
+}
+
+/* Output lfence, 0xfaee8, after instruction. */
+
+static void
+insert_lfence_after (void)
+{
+ if (lfence_after_load && load_insn_p ())
+ {
+ /* There are also two REP string instructions that require
+ special treatment. Specifically, the compare string (CMPS)
+ and scan string (SCAS) instructions set EFLAGS in a manner
+ that depends on the data being compared/scanned. When used
+ with a REP prefix, the number of iterations may therefore
+ vary depending on this data. If the data is a program secret
+ chosen by the adversary using an LVI method,
+ then this data-dependent behavior may leak some aspect
+ of the secret. */
+ if (((i.tm.base_opcode | 0x1) == 0xa7
+ || (i.tm.base_opcode | 0x1) == 0xaf)
+ && i.prefix[REP_PREFIX])
+ {
+ as_warn (_("`%s` changes flags which would affect control flow behavior"),
+ i.tm.name);
+ }
+ char *p = frag_more (3);
+ *p++ = 0xf;
+ *p++ = 0xae;
+ *p = 0xe8;
+ }
+}
+
+/* Output lfence, 0xfaee8, before instruction. */
+
+static void
+insert_lfence_before (void)
+{
+ char *p;
+
+ if (is_any_vex_encoding (&i.tm))
+ return;
+
+ if (i.tm.base_opcode == 0xff
+ && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
+ {
+ /* Insert lfence before indirect branch if needed. */
+
+ if (lfence_before_indirect_branch == lfence_branch_none)
+ return;
+
+ if (i.operands != 1)
+ abort ();
+
+ if (i.reg_operands == 1)
+ {
+ /* Indirect branch via register. Don't insert lfence with
+ -mlfence-after-load=yes. */
+ if (lfence_after_load
+ || lfence_before_indirect_branch == lfence_branch_memory)
+ return;
+ }
+ else if (i.mem_operands == 1
+ && lfence_before_indirect_branch != lfence_branch_register)
+ {
+ as_warn (_("indirect `%s` with memory operand should be avoided"),
+ i.tm.name);
+ return;
+ }
+ else
+ return;
+
+ if (last_insn.kind != last_insn_other
+ && last_insn.seg == now_seg)
+ {
+ as_warn_where (last_insn.file, last_insn.line,
+ _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
+ last_insn.name, i.tm.name);
+ return;
+ }
+
+ p = frag_more (3);
+ *p++ = 0xf;
+ *p++ = 0xae;
+ *p = 0xe8;
+ return;
+ }
+
+ /* Output or/not/shl and lfence before ret/lret/iret. */
+ if (lfence_before_ret != lfence_before_ret_none
+ && (i.tm.base_opcode == 0xc2
+ || i.tm.base_opcode == 0xc3
+ || i.tm.base_opcode == 0xca
+ || i.tm.base_opcode == 0xcb
+ || i.tm.base_opcode == 0xcf))
+ {
+ if (last_insn.kind != last_insn_other
+ && last_insn.seg == now_seg)
+ {
+ as_warn_where (last_insn.file, last_insn.line,
+ _("`%s` skips -mlfence-before-ret on `%s`"),
+ last_insn.name, i.tm.name);
+ return;
+ }
+
+ /* lret or iret. */
+ bfd_boolean lret = (i.tm.base_opcode | 0x5) == 0xcf;
+ bfd_boolean has_rexw = i.prefix[REX_PREFIX] & REX_W;
+ char prefix = 0x0;
+ /* Default operand size for far return is 32 bits,
+ 64 bits for near return. */
+ /* Near ret ingore operand size override under CPU64. */
+ if ((!lret && flag_code == CODE_64BIT) || has_rexw)
+ prefix = 0x48;
+ else if (i.prefix[DATA_PREFIX])
+ prefix = 0x66;
+
+ if (lfence_before_ret == lfence_before_ret_not)
+ {
+ /* not: 0xf71424, may add prefix
+ for operand size override or 64-bit code. */
+ p = frag_more ((prefix ? 2 : 0) + 6 + 3);
+ if (prefix)
+ *p++ = prefix;
+ *p++ = 0xf7;
+ *p++ = 0x14;
+ *p++ = 0x24;
+ if (prefix)
+ *p++ = prefix;
+ *p++ = 0xf7;
+ *p++ = 0x14;
+ *p++ = 0x24;
+ }
+ else
+ {
+ p = frag_more ((prefix ? 1 : 0) + 4 + 3);
+ if (prefix)
+ *p++ = prefix;
+ if (lfence_before_ret == lfence_before_ret_or)
+ {
+ /* or: 0x830c2400, may add prefix
+ for operand size override or 64-bit code. */
+ *p++ = 0x83;
+ *p++ = 0x0c;
+ }
+ else
+ {
+ /* shl: 0xc1242400, may add prefix
+ for operand size override or 64-bit code. */
+ *p++ = 0xc1;
+ *p++ = 0x24;
+ }
+
+ *p++ = 0x24;
+ *p++ = 0x0;
+ }
+
+ *p++ = 0xf;
+ *p++ = 0xae;
+ *p = 0xe8;
+ }
+}
+
/* This is the guts of the machine-dependent assembler. LINE points to a
machine dependent instruction. This function is supposed to emit
the frags/bytes it assembles to. */
if (i.rex != 0)
add_prefix (REX_OPCODE | i.rex);
+ insert_lfence_before ();
+
/* We are ready to output the insn. */
output_insn ();
+ insert_lfence_after ();
+
last_insn.seg = now_seg;
if (i.tm.opcode_modifier.isprefix)
}
overlap = operand_type_and (type, t->operand_types[op]);
+ if (t->operand_types[op].bitfield.class == RegSIMD
+ && t->operand_types[op].bitfield.byte
+ + t->operand_types[op].bitfield.word
+ + t->operand_types[op].bitfield.dword
+ + t->operand_types[op].bitfield.qword > 1)
+ {
+ overlap.bitfield.xmmword = 0;
+ overlap.bitfield.ymmword = 0;
+ overlap.bitfield.zmmword = 0;
+ }
if (operand_type_all_zero (&overlap))
goto bad_broadcast;
for (j = 0; j < MAX_OPERANDS; j++)
operand_types[j] = t->operand_types[j];
- /* In general, don't allow 64-bit operands in 32-bit mode. */
- if (i.suffix == QWORD_MNEM_SUFFIX
- && flag_code != CODE_64BIT
+ /* In general, don't allow
+ - 64-bit operands outside of 64-bit mode,
+ - 32-bit operands on pre-386. */
+ j = i.imm_operands + (t->operands > i.imm_operands + 1);
+ if (((i.suffix == QWORD_MNEM_SUFFIX
+ && flag_code != CODE_64BIT
+ && (t->base_opcode != 0x0fc7
+ || t->extension_opcode != 1 /* cmpxchg8b */))
+ || (i.suffix == LONG_MNEM_SUFFIX
+ && !cpu_arch_flags.bitfield.cpui386))
&& (intel_syntax
? (t->opcode_modifier.mnemonicsize != IGNORESIZE
- && !t->opcode_modifier.broadcast
&& !intel_float_operand (t->name))
: intel_float_operand (t->name) != 2)
- && ((operand_types[0].bitfield.class != RegMMX
- && operand_types[0].bitfield.class != RegSIMD)
- || (operand_types[t->operands > 1].bitfield.class != RegMMX
- && operand_types[t->operands > 1].bitfield.class != RegSIMD))
- && (t->base_opcode != 0x0fc7
- || t->extension_opcode != 1 /* cmpxchg8b */))
- continue;
-
- /* In general, don't allow 32-bit operands on pre-386. */
- else if (i.suffix == LONG_MNEM_SUFFIX
- && !cpu_arch_flags.bitfield.cpui386
- && (intel_syntax
- ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
- && !intel_float_operand (t->name))
- : intel_float_operand (t->name) != 2)
- && ((operand_types[0].bitfield.class != RegMMX
- && operand_types[0].bitfield.class != RegSIMD)
- || (operand_types[t->operands > 1].bitfield.class != RegMMX
- && operand_types[t->operands > 1].bitfield.class
- != RegSIMD)))
+ && (t->operands == i.imm_operands
+ || (operand_types[i.imm_operands].bitfield.class != RegMMX
+ && operand_types[i.imm_operands].bitfield.class != RegSIMD
+ && operand_types[i.imm_operands].bitfield.class != RegMask)
+ || (operand_types[j].bitfield.class != RegMMX
+ && operand_types[j].bitfield.class != RegSIMD
+ && operand_types[j].bitfield.class != RegMask))
+ && !t->opcode_modifier.vecsib)
continue;
/* Do not verify operands when there are none. */
- else
- {
- if (!t->operands)
- /* We've found a match; break out of loop. */
- break;
- }
+ if (!t->operands)
+ /* We've found a match; break out of loop. */
+ break;
if (!t->opcode_modifier.jump
|| t->opcode_modifier.jump == JUMP_ABSOLUTE)
if (i.suffix == QWORD_MNEM_SUFFIX
&& flag_code == CODE_64BIT
&& !i.tm.opcode_modifier.norex64
+ && !i.tm.opcode_modifier.vexw
/* Special case for xchg %rax,%rax. It is NOP and doesn't
need rex64. */
&& ! (i.operands == 2
#define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
#define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
#define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
+#define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
+#define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
+#define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
struct option md_longopts[] =
{
{"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
{"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
{"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
+ {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
+ {"mlfence-before-indirect-branch", required_argument, NULL,
+ OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
+ {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
{"mamd64", no_argument, NULL, OPTION_MAMD64},
{"mintel64", no_argument, NULL, OPTION_MINTEL64},
{NULL, no_argument, NULL, 0}
as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
break;
+ case OPTION_MLFENCE_AFTER_LOAD:
+ if (strcasecmp (arg, "yes") == 0)
+ lfence_after_load = 1;
+ else if (strcasecmp (arg, "no") == 0)
+ lfence_after_load = 0;
+ else
+ as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
+ break;
+
+ case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
+ if (strcasecmp (arg, "all") == 0)
+ {
+ lfence_before_indirect_branch = lfence_branch_all;
+ if (lfence_before_ret == lfence_before_ret_none)
+ lfence_before_ret = lfence_before_ret_shl;
+ }
+ else if (strcasecmp (arg, "memory") == 0)
+ lfence_before_indirect_branch = lfence_branch_memory;
+ else if (strcasecmp (arg, "register") == 0)
+ lfence_before_indirect_branch = lfence_branch_register;
+ else if (strcasecmp (arg, "none") == 0)
+ lfence_before_indirect_branch = lfence_branch_none;
+ else
+ as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
+ arg);
+ break;
+
+ case OPTION_MLFENCE_BEFORE_RET:
+ if (strcasecmp (arg, "or") == 0)
+ lfence_before_ret = lfence_before_ret_or;
+ else if (strcasecmp (arg, "not") == 0)
+ lfence_before_ret = lfence_before_ret_not;
+ else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
+ lfence_before_ret = lfence_before_ret_shl;
+ else if (strcasecmp (arg, "none") == 0)
+ lfence_before_ret = lfence_before_ret_none;
+ else
+ as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
+ arg);
+ break;
+
case OPTION_MRELAX_RELOCATIONS:
if (strcasecmp (arg, "yes") == 0)
generate_relax_relocations = 1;
-mbranches-within-32B-boundaries\n\
align branches within 32 byte boundary\n"));
fprintf (stream, _("\
+ -mlfence-after-load=[no|yes] (default: no)\n\
+ generate lfence after load\n"));
+ fprintf (stream, _("\
+ -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
+ generate lfence before indirect near branch\n"));
+ fprintf (stream, _("\
+ -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
+ generate lfence before ret\n"));
+ fprintf (stream, _("\
-mamd64 accept only AMD64 ISA [default]\n"));
fprintf (stream, _("\
-mintel64 accept only Intel64 ISA\n"));
last_insn.kind = last_insn_directive;
last_insn.name = "constant directive";
last_insn.file = as_where (&last_insn.line);
+ if (lfence_before_ret != lfence_before_ret_none)
+ {
+ if (lfence_before_indirect_branch != lfence_branch_none)
+ as_warn (_("constant directive skips -mlfence-before-ret "
+ "and -mlfence-before-indirect-branch"));
+ else
+ as_warn (_("constant directive skips -mlfence-before-ret"));
+ }
+ else if (lfence_before_indirect_branch != lfence_branch_none)
+ as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
}
}