"lock addl $0, (%{re}sp)". */
static int avoid_fence = 0;
-/* Type of the previous instruction. */
+/* 1 if lfence should be inserted after every load. */
+static int lfence_after_load = 0;
+
+/* Non-zero if lfence should be inserted before indirect branch. */
+static enum lfence_before_indirect_branch_kind
+ {
+ lfence_branch_none = 0,
+ lfence_branch_register,
+ lfence_branch_memory,
+ lfence_branch_all
+ }
+lfence_before_indirect_branch;
+
+/* Non-zero if lfence should be inserted before ret. */
+static enum lfence_before_ret_kind
+ {
+ lfence_before_ret_none = 0,
+ lfence_before_ret_not,
+ lfence_before_ret_or
+ }
+lfence_before_ret;
+
+/* Types of previous instruction is .byte or prefix. */
static struct
{
segT seg;
CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
{ STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
CPU_ENQCMD_FLAGS, 0 },
+ { STRING_COMMA_LEN (".serialize"), PROCESSOR_UNKNOWN,
+ CPU_SERIALIZE_FLAGS, 0 },
{ STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
CPU_RDPRU_FLAGS, 0 },
{ STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
{ STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
{ STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
{ STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
+ { STRING_COMMA_LEN ("noserialize"), CPU_ANY_SERIALIZE_FLAGS },
};
#ifdef I386COFF
here. Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
down-conversion vpmov*. */
|| ((t->operand_types[wanted].bitfield.class == RegSIMD
- && !t->opcode_modifier.broadcast
- && (t->operand_types[wanted].bitfield.byte
- || t->operand_types[wanted].bitfield.word
- || t->operand_types[wanted].bitfield.dword
- || t->operand_types[wanted].bitfield.qword))
+ && t->operand_types[wanted].bitfield.byte
+ + t->operand_types[wanted].bitfield.word
+ + t->operand_types[wanted].bitfield.dword
+ + t->operand_types[wanted].bitfield.qword
+ > !!t->opcode_modifier.broadcast)
? (i.types[given].bitfield.xmmword
|| i.types[given].bitfield.ymmword
|| i.types[given].bitfield.zmmword)
}
}
+/* Return non-zero for load instruction. */
+
+static int
+load_insn_p (void)
+{
+ unsigned int dest;
+ int any_vex_p = is_any_vex_encoding (&i.tm);
+ unsigned int base_opcode = i.tm.base_opcode | 1;
+
+ if (!any_vex_p)
+ {
+ /* lea */
+ if (i.tm.base_opcode == 0x8d)
+ return 0;
+
+ /* pop */
+ if ((i.tm.base_opcode & ~7) == 0x58
+ || (i.tm.base_opcode == 0x8f && i.tm.extension_opcode == 0))
+ return 1;
+
+ /* movs, cmps, lods, scas. */
+ if ((i.tm.base_opcode | 0xb) == 0xaf)
+ return 1;
+
+ /* outs */
+ if (base_opcode == 0x6f)
+ return 1;
+ }
+
+ /* No memory operand. */
+ if (!i.mem_operands)
+ return 0;
+
+ if (any_vex_p)
+ {
+ /* vldmxcsr. */
+ if (i.tm.base_opcode == 0xae
+ && i.tm.opcode_modifier.vex
+ && i.tm.opcode_modifier.vexopcode == VEX0F
+ && i.tm.extension_opcode == 2)
+ return 1;
+ }
+ else
+ {
+ /* test, not, neg, mul, imul, div, idiv. */
+ if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
+ && i.tm.extension_opcode != 1)
+ return 1;
+
+ /* inc, dec. */
+ if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
+ return 1;
+
+ /* add, or, adc, sbb, and, sub, xor, cmp. */
+ if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
+ return 1;
+
+ /* bt, bts, btr, btc. */
+ if (i.tm.base_opcode == 0xfba
+ && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
+ return 1;
+
+ /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
+ if ((base_opcode == 0xc1
+ || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
+ && i.tm.extension_opcode != 6)
+ return 1;
+
+ /* cmpxchg8b, cmpxchg16b, xrstors. */
+ if (i.tm.base_opcode == 0xfc7
+ && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3))
+ return 1;
+
+ /* fxrstor, ldmxcsr, xrstor. */
+ if (i.tm.base_opcode == 0xfae
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 5))
+ return 1;
+
+ /* lgdt, lidt, lmsw. */
+ if (i.tm.base_opcode == 0xf01
+ && (i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6))
+ return 1;
+
+ /* vmptrld */
+ if (i.tm.base_opcode == 0xfc7
+ && i.tm.extension_opcode == 6)
+ return 1;
+
+ /* Check for x87 instructions. */
+ if (i.tm.base_opcode >= 0xd8 && i.tm.base_opcode <= 0xdf)
+ {
+ /* Skip fst, fstp, fstenv, fstcw. */
+ if (i.tm.base_opcode == 0xd9
+ && (i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ /* Skip fisttp, fist, fistp, fstp. */
+ if (i.tm.base_opcode == 0xdb
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ /* Skip fisttp, fst, fstp, fsave, fstsw. */
+ if (i.tm.base_opcode == 0xdd
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ /* Skip fisttp, fist, fistp, fbstp, fistp. */
+ if (i.tm.base_opcode == 0xdf
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ return 1;
+ }
+ }
+
+ dest = i.operands - 1;
+
+ /* Check fake imm8 operand and 3 source operands. */
+ if ((i.tm.opcode_modifier.immext
+ || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
+ && i.types[dest].bitfield.imm8)
+ dest--;
+
+ /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg, xadd */
+ if (!any_vex_p
+ && (base_opcode == 0x1
+ || base_opcode == 0x9
+ || base_opcode == 0x11
+ || base_opcode == 0x19
+ || base_opcode == 0x21
+ || base_opcode == 0x29
+ || base_opcode == 0x31
+ || base_opcode == 0x39
+ || (i.tm.base_opcode >= 0x84 && i.tm.base_opcode <= 0x87)
+ || base_opcode == 0xfc1))
+ return 1;
+
+ /* Check for load instruction. */
+ return (i.types[dest].bitfield.class != ClassNone
+ || i.types[dest].bitfield.instance == Accum);
+}
+
+/* Output lfence, 0xfaee8, after instruction. */
+
+static void
+insert_lfence_after (void)
+{
+ if (lfence_after_load && load_insn_p ())
+ {
+ char *p = frag_more (3);
+ *p++ = 0xf;
+ *p++ = 0xae;
+ *p = 0xe8;
+ }
+}
+
+/* Output lfence, 0xfaee8, before instruction. */
+
+static void
+insert_lfence_before (void)
+{
+ char *p;
+
+ if (is_any_vex_encoding (&i.tm))
+ return;
+
+ if (i.tm.base_opcode == 0xff
+ && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
+ {
+ /* Insert lfence before indirect branch if needed. */
+
+ if (lfence_before_indirect_branch == lfence_branch_none)
+ return;
+
+ if (i.operands != 1)
+ abort ();
+
+ if (i.reg_operands == 1)
+ {
+ /* Indirect branch via register. Don't insert lfence with
+ -mlfence-after-load=yes. */
+ if (lfence_after_load
+ || lfence_before_indirect_branch == lfence_branch_memory)
+ return;
+ }
+ else if (i.mem_operands == 1
+ && lfence_before_indirect_branch != lfence_branch_register)
+ {
+ as_warn (_("indirect `%s` with memory operand should be avoided"),
+ i.tm.name);
+ return;
+ }
+ else
+ return;
+
+ if (last_insn.kind != last_insn_other
+ && last_insn.seg == now_seg)
+ {
+ as_warn_where (last_insn.file, last_insn.line,
+ _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
+ last_insn.name, i.tm.name);
+ return;
+ }
+
+ p = frag_more (3);
+ *p++ = 0xf;
+ *p++ = 0xae;
+ *p = 0xe8;
+ return;
+ }
+
+ /* Output or/not and lfence before ret. */
+ if (lfence_before_ret != lfence_before_ret_none
+ && (i.tm.base_opcode == 0xc2
+ || i.tm.base_opcode == 0xc3
+ || i.tm.base_opcode == 0xca
+ || i.tm.base_opcode == 0xcb))
+ {
+ if (last_insn.kind != last_insn_other
+ && last_insn.seg == now_seg)
+ {
+ as_warn_where (last_insn.file, last_insn.line,
+ _("`%s` skips -mlfence-before-ret on `%s`"),
+ last_insn.name, i.tm.name);
+ return;
+ }
+ if (lfence_before_ret == lfence_before_ret_or)
+ {
+ /* orl: 0x830c2400. */
+ p = frag_more ((flag_code == CODE_64BIT ? 1 : 0) + 4 + 3);
+ if (flag_code == CODE_64BIT)
+ *p++ = 0x48;
+ *p++ = 0x83;
+ *p++ = 0xc;
+ *p++ = 0x24;
+ *p++ = 0x0;
+ }
+ else
+ {
+ p = frag_more ((flag_code == CODE_64BIT ? 2 : 0) + 6 + 3);
+ /* notl: 0xf71424. */
+ if (flag_code == CODE_64BIT)
+ *p++ = 0x48;
+ *p++ = 0xf7;
+ *p++ = 0x14;
+ *p++ = 0x24;
+ /* notl: 0xf71424. */
+ if (flag_code == CODE_64BIT)
+ *p++ = 0x48;
+ *p++ = 0xf7;
+ *p++ = 0x14;
+ *p++ = 0x24;
+ }
+ *p++ = 0xf;
+ *p++ = 0xae;
+ *p = 0xe8;
+ }
+}
+
/* This is the guts of the machine-dependent assembler. LINE points to a
machine dependent instruction. This function is supposed to emit
the frags/bytes it assembles to. */
i.op[0].disps->X_op = O_symbol;
}
- if (i.tm.opcode_modifier.rex64)
- i.rex |= REX_W;
-
/* For 8 bit registers we need an empty rex prefix. Also if the
instruction already has a prefix, we need to convert old
registers to new ones. */
if (i.rex != 0)
add_prefix (REX_OPCODE | i.rex);
+ insert_lfence_before ();
+
/* We are ready to output the insn. */
output_insn ();
+ insert_lfence_after ();
+
last_insn.seg = now_seg;
if (i.tm.opcode_modifier.isprefix)
}
overlap = operand_type_and (type, t->operand_types[op]);
+ if (t->operand_types[op].bitfield.class == RegSIMD
+ && t->operand_types[op].bitfield.byte
+ + t->operand_types[op].bitfield.word
+ + t->operand_types[op].bitfield.dword
+ + t->operand_types[op].bitfield.qword > 1)
+ {
+ overlap.bitfield.xmmword = 0;
+ overlap.bitfield.ymmword = 0;
+ overlap.bitfield.zmmword = 0;
+ }
if (operand_type_all_zero (&overlap))
goto bad_broadcast;
for (j = 0; j < MAX_OPERANDS; j++)
operand_types[j] = t->operand_types[j];
- /* In general, don't allow 64-bit operands in 32-bit mode. */
- if (i.suffix == QWORD_MNEM_SUFFIX
- && flag_code != CODE_64BIT
+ /* In general, don't allow
+ - 64-bit operands outside of 64-bit mode,
+ - 32-bit operands on pre-386. */
+ j = i.imm_operands + (t->operands > i.imm_operands + 1);
+ if (((i.suffix == QWORD_MNEM_SUFFIX
+ && flag_code != CODE_64BIT
+ && (t->base_opcode != 0x0fc7
+ || t->extension_opcode != 1 /* cmpxchg8b */))
+ || (i.suffix == LONG_MNEM_SUFFIX
+ && !cpu_arch_flags.bitfield.cpui386))
&& (intel_syntax
? (t->opcode_modifier.mnemonicsize != IGNORESIZE
- && !t->opcode_modifier.broadcast
&& !intel_float_operand (t->name))
: intel_float_operand (t->name) != 2)
- && ((operand_types[0].bitfield.class != RegMMX
- && operand_types[0].bitfield.class != RegSIMD)
- || (operand_types[t->operands > 1].bitfield.class != RegMMX
- && operand_types[t->operands > 1].bitfield.class != RegSIMD))
- && (t->base_opcode != 0x0fc7
- || t->extension_opcode != 1 /* cmpxchg8b */))
- continue;
-
- /* In general, don't allow 32-bit operands on pre-386. */
- else if (i.suffix == LONG_MNEM_SUFFIX
- && !cpu_arch_flags.bitfield.cpui386
- && (intel_syntax
- ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
- && !intel_float_operand (t->name))
- : intel_float_operand (t->name) != 2)
- && ((operand_types[0].bitfield.class != RegMMX
- && operand_types[0].bitfield.class != RegSIMD)
- || (operand_types[t->operands > 1].bitfield.class != RegMMX
- && operand_types[t->operands > 1].bitfield.class
- != RegSIMD)))
+ && (t->operands == i.imm_operands
+ || (operand_types[i.imm_operands].bitfield.class != RegMMX
+ && operand_types[i.imm_operands].bitfield.class != RegSIMD
+ && operand_types[i.imm_operands].bitfield.class != RegMask)
+ || (operand_types[j].bitfield.class != RegMMX
+ && operand_types[j].bitfield.class != RegSIMD
+ && operand_types[j].bitfield.class != RegMask))
+ && !t->opcode_modifier.vecsib)
continue;
/* Do not verify operands when there are none. */
- else
- {
- if (!t->operands)
- /* We've found a match; break out of loop. */
- break;
- }
+ if (!t->operands)
+ /* We've found a match; break out of loop. */
+ break;
if (!t->opcode_modifier.jump
|| t->opcode_modifier.jump == JUMP_ABSOLUTE)
|| (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
--i.operands;
+ /* crc32 needs REX.W set regardless of suffix / source operand size. */
+ if (i.tm.base_opcode == 0xf20f38f0
+ && i.tm.operand_types[1].bitfield.qword)
+ i.rex |= REX_W;
+
/* If there's no instruction mnemonic suffix we try to invent one
based on GPR operands. */
if (!i.suffix)
if (i.suffix == QWORD_MNEM_SUFFIX
&& flag_code == CODE_64BIT
&& !i.tm.opcode_modifier.norex64
+ && !i.tm.opcode_modifier.vexw
/* Special case for xchg %rax,%rax. It is NOP and doesn't
need rex64. */
&& ! (i.operands == 2
if (i.prefix[ADDR_PREFIX])
addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
+ else if (flag_code == CODE_16BIT
+ && current_templates->start->cpu_flags.bitfield.cpumpx
+ /* Avoid replacing the "16-bit addressing not allowed" diagnostic
+ from md_assemble() by "is not a valid base/index expression"
+ when there is a base and/or index. */
+ && !i.types[this_operand].bitfield.baseindex)
+ {
+ /* MPX insn memory operands with neither base nor index must be forced
+ to use 32-bit addressing in 16-bit mode. */
+ addr_mode = CODE_32BIT;
+ i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
+ ++i.prefixes;
+ gas_assert (!i.types[this_operand].bitfield.disp16);
+ gas_assert (!i.types[this_operand].bitfield.disp32);
+ }
else
{
addr_mode = flag_code;
#define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
#define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
#define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
+#define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
+#define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
+#define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
struct option md_longopts[] =
{
{"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
{"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
{"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
+ {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
+ {"mlfence-before-indirect-branch", required_argument, NULL,
+ OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
+ {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
{"mamd64", no_argument, NULL, OPTION_MAMD64},
{"mintel64", no_argument, NULL, OPTION_MINTEL64},
{NULL, no_argument, NULL, 0}
as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
break;
+ case OPTION_MLFENCE_AFTER_LOAD:
+ if (strcasecmp (arg, "yes") == 0)
+ lfence_after_load = 1;
+ else if (strcasecmp (arg, "no") == 0)
+ lfence_after_load = 0;
+ else
+ as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
+ break;
+
+ case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
+ if (strcasecmp (arg, "all") == 0)
+ lfence_before_indirect_branch = lfence_branch_all;
+ else if (strcasecmp (arg, "memory") == 0)
+ lfence_before_indirect_branch = lfence_branch_memory;
+ else if (strcasecmp (arg, "register") == 0)
+ lfence_before_indirect_branch = lfence_branch_register;
+ else if (strcasecmp (arg, "none") == 0)
+ lfence_before_indirect_branch = lfence_branch_none;
+ else
+ as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
+ arg);
+ break;
+
+ case OPTION_MLFENCE_BEFORE_RET:
+ if (strcasecmp (arg, "or") == 0)
+ lfence_before_ret = lfence_before_ret_or;
+ else if (strcasecmp (arg, "not") == 0)
+ lfence_before_ret = lfence_before_ret_not;
+ else if (strcasecmp (arg, "none") == 0)
+ lfence_before_ret = lfence_before_ret_none;
+ else
+ as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
+ arg);
+ break;
+
case OPTION_MRELAX_RELOCATIONS:
if (strcasecmp (arg, "yes") == 0)
generate_relax_relocations = 1;
-mbranches-within-32B-boundaries\n\
align branches within 32 byte boundary\n"));
fprintf (stream, _("\
+ -mlfence-after-load=[no|yes] (default: no)\n\
+ generate lfence after load\n"));
+ fprintf (stream, _("\
+ -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
+ generate lfence before indirect near branch\n"));
+ fprintf (stream, _("\
+ -mlfence-before-ret=[none|or|not] (default: none)\n\
+ generate lfence before ret\n"));
+ fprintf (stream, _("\
-mamd64 accept only AMD64 ISA [default]\n"));
fprintf (stream, _("\
-mintel64 accept only Intel64 ISA\n"));
last_insn.kind = last_insn_directive;
last_insn.name = "constant directive";
last_insn.file = as_where (&last_insn.line);
+ if (lfence_before_ret != lfence_before_ret_none)
+ {
+ if (lfence_before_indirect_branch != lfence_branch_none)
+ as_warn (_("constant directive skips -mlfence-before-ret "
+ "and -mlfence-before-indirect-branch"));
+ else
+ as_warn (_("constant directive skips -mlfence-before-ret"));
+ }
+ else if (lfence_before_indirect_branch != lfence_branch_none)
+ as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
}
}