/* tc-i386.c -- Assemble code for the Intel 80386
- Copyright (C) 1989-2019 Free Software Foundation, Inc.
+ Copyright (C) 1989-2020 Free Software Foundation, Inc.
This file is part of GAS, the GNU Assembler.
#endif
#endif
-#ifndef REGISTER_WARNINGS
-#define REGISTER_WARNINGS 1
-#endif
-
#ifndef INFER_ADDR_PREFIX
#define INFER_ADDR_PREFIX 1
#endif
static char *parse_operands (char *, const char *);
static void swap_operands (void);
static void swap_2_operands (int, int);
+static enum flag_code i386_addressing_mode (void);
static void optimize_imm (void);
static void optimize_disp (void);
static const insn_template *match_template (char);
unsigned int prefixes;
unsigned char prefix[MAX_PREFIXES];
+ /* Register is in low 3 bits of opcode. */
+ bfd_boolean short_form;
+
/* The operand to a branch insn indicates an absolute branch. */
bfd_boolean jumpabsolute;
enum
{
vex_encoding_default = 0,
- vex_encoding_vex2,
+ vex_encoding_vex,
vex_encoding_vex3,
vex_encoding_evex
} vec_encoding;
0 if att syntax. */
static int intel_syntax = 0;
-/* 1 for Intel64 ISA,
- 0 if AMD64 ISA. */
-static int intel64;
+static enum x86_64_isa
+{
+ amd64 = 1, /* AMD64 ISA. */
+ intel64 /* Intel64 ISA. */
+} isa64;
/* 1 for intel mnemonic,
0 if att mnemonic. */
| align_branch_fused_bit
| align_branch_jmp_bit);
+/* Types of condition jump used by macro-fusion. */
+enum mf_jcc_kind
+ {
+ mf_jcc_jo = 0, /* base opcode 0x70 */
+ mf_jcc_jc, /* base opcode 0x72 */
+ mf_jcc_je, /* base opcode 0x74 */
+ mf_jcc_jna, /* base opcode 0x76 */
+ mf_jcc_js, /* base opcode 0x78 */
+ mf_jcc_jp, /* base opcode 0x7a */
+ mf_jcc_jl, /* base opcode 0x7c */
+ mf_jcc_jle, /* base opcode 0x7e */
+ };
+
+/* Types of compare flag-modifying insntructions used by macro-fusion. */
+enum mf_cmp_kind
+ {
+ mf_cmp_test_and, /* test/cmp */
+ mf_cmp_alu_cmp, /* add/sub/cmp */
+ mf_cmp_incdec /* inc/dec */
+ };
+
/* The maximum padding size for fused jcc. CMP like instruction can
be 9 bytes and jcc can be 6 bytes. Leave room just in case for
prefixes. */
CPU_SSE2_FLAGS, 0 },
{ STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
CPU_SSE3_FLAGS, 0 },
+ { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
+ CPU_SSE4A_FLAGS, 0 },
{ STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
CPU_SSSE3_FLAGS, 0 },
{ STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
CPU_EPT_FLAGS, 0 },
{ STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
CPU_LZCNT_FLAGS, 0 },
+ { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
+ CPU_POPCNT_FLAGS, 0 },
{ STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
CPU_HLE_FLAGS, 0 },
{ STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
CPU_RDPRU_FLAGS, 0 },
{ STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
CPU_MCOMMIT_FLAGS, 0 },
+ { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
+ CPU_SEV_ES_FLAGS, 0 },
};
static const noarch_entry cpu_noarch[] =
{ STRING_COMMA_LEN ("nosse"), CPU_ANY_SSE_FLAGS },
{ STRING_COMMA_LEN ("nosse2"), CPU_ANY_SSE2_FLAGS },
{ STRING_COMMA_LEN ("nosse3"), CPU_ANY_SSE3_FLAGS },
+ { STRING_COMMA_LEN ("nosse4a"), CPU_ANY_SSE4A_FLAGS },
{ STRING_COMMA_LEN ("nossse3"), CPU_ANY_SSSE3_FLAGS },
{ STRING_COMMA_LEN ("nosse4.1"), CPU_ANY_SSE4_1_FLAGS },
{ STRING_COMMA_LEN ("nosse4.2"), CPU_ANY_SSE4_2_FLAGS },
return x;
}
+static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
+
#define CPU_FLAGS_ARCH_MATCH 0x1
#define CPU_FLAGS_64BIT_MATCH 0x2
if (!t->opcode_modifier.d)
{
-mismatch:
+ mismatch:
if (!match)
i.error = operand_size_mismatch;
return match;
if (given.bitfield.baseindex == overlap.bitfield.baseindex)
return 1;
-mismatch:
+ mismatch:
i.error = operand_type_mismatch;
return 0;
}
/* If given types g0 and g1 are registers they must be of the same type
unless the expected operand type register overlap is null.
- Memory operand size of certain SIMD instructions is also being checked
- here. */
+ Some Intel syntax memory operand size checking also happens here. */
static INLINE int
operand_type_register_match (i386_operand_type g0,
&& g0.bitfield.class != RegSIMD
&& (!operand_type_check (g0, anymem)
|| g0.bitfield.unspecified
- || t0.bitfield.class != RegSIMD))
+ || (t0.bitfield.class != Reg
+ && t0.bitfield.class != RegSIMD)))
return 1;
if (g1.bitfield.class != Reg
&& g1.bitfield.class != RegSIMD
&& (!operand_type_check (g1, anymem)
|| g1.bitfield.unspecified
- || t1.bitfield.class != RegSIMD))
+ || (t1.bitfield.class != Reg
+ && t1.bitfield.class != RegSIMD)))
return 1;
if (g0.bitfield.byte == g1.bitfield.byte
if (fixP->fx_r_type == BFD_RELOC_SIZE32
|| fixP->fx_r_type == BFD_RELOC_SIZE64
|| fixP->fx_r_type == BFD_RELOC_386_GOTOFF
- || fixP->fx_r_type == BFD_RELOC_386_PLT32
|| fixP->fx_r_type == BFD_RELOC_386_GOT32
|| fixP->fx_r_type == BFD_RELOC_386_GOT32X
|| fixP->fx_r_type == BFD_RELOC_386_TLS_GD
|| fixP->fx_r_type == BFD_RELOC_386_TLS_LE
|| fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
|| fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
- || fixP->fx_r_type == BFD_RELOC_X86_64_PLT32
|| fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
|| fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
|| fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
unsigned int j;
if (optimize_for_space
+ && !is_any_vex_encoding (&i.tm)
&& i.reg_operands == 1
&& i.imm_operands == 1
&& !i.types[1].bitfield.byte
&& i.op[0].imms->X_op == O_constant
&& fits_in_imm7 (i.op[0].imms->X_add_number)
- && ((i.tm.base_opcode == 0xa8
- && i.tm.extension_opcode == None)
+ && (i.tm.base_opcode == 0xa8
|| (i.tm.base_opcode == 0xf6
&& i.tm.extension_opcode == 0x0)))
{
i.types[1].bitfield.byte = 1;
/* Ignore the suffix. */
i.suffix = 0;
- if (base_regnum >= 4)
- {
- /* Handle SP, BP, SI, DI and R12-R15 registers. */
- if (i.types[1].bitfield.word)
- j = 16;
- else if (i.types[1].bitfield.dword)
- j = 32;
- else
- j = 48;
- i.op[1].regs -= j;
- }
+ /* Convert to byte registers. */
+ if (i.types[1].bitfield.word)
+ j = 16;
+ else if (i.types[1].bitfield.dword)
+ j = 32;
+ else
+ j = 48;
+ if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
+ j += 8;
+ i.op[1].regs -= j;
}
}
else if (flag_code == CODE_64BIT
+ && !is_any_vex_encoding (&i.tm)
&& ((i.types[1].bitfield.qword
&& i.reg_operands == 1
&& i.imm_operands == 1
&& i.tm.extension_opcode == None
&& fits_in_unsigned_long (i.op[0].imms->X_add_number))
|| (fits_in_imm31 (i.op[0].imms->X_add_number)
- && (((i.tm.base_opcode == 0x24
- || i.tm.base_opcode == 0xa8)
- && i.tm.extension_opcode == None)
+ && ((i.tm.base_opcode == 0x24
+ || i.tm.base_opcode == 0xa8)
|| (i.tm.base_opcode == 0x80
&& i.tm.extension_opcode == 0x4)
|| ((i.tm.base_opcode == 0xf6
|| (i.types[0].bitfield.qword
&& ((i.reg_operands == 2
&& i.op[0].regs == i.op[1].regs
- && ((i.tm.base_opcode == 0x30
- || i.tm.base_opcode == 0x28)
- && i.tm.extension_opcode == None))
+ && (i.tm.base_opcode == 0x30
+ || i.tm.base_opcode == 0x28))
|| (i.reg_operands == 1
&& i.operands == 1
- && i.tm.base_opcode == 0x30
- && i.tm.extension_opcode == None)))))
+ && i.tm.base_opcode == 0x30)))))
{
/* Optimize: -O:
andq $imm31, %r64 -> andl $imm31, %r32
i.tm.base_opcode = 0xb8;
i.tm.extension_opcode = None;
i.tm.opcode_modifier.w = 0;
- i.tm.opcode_modifier.shortform = 1;
i.tm.opcode_modifier.modrm = 0;
}
}
}
else if (optimize > 1
&& !optimize_for_space
+ && !is_any_vex_encoding (&i.tm)
&& i.reg_operands == 2
&& i.op[0].regs == i.op[1].regs
&& ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
/* Now we've parsed the mnemonic into a set of templates, and have the
operands at hand. */
- /* All intel opcodes have reversed operands except for "bound" and
- "enter". We also don't reverse intersegment "jmp" and "call"
- instructions with 2 immediate operands so that the immediate segment
- precedes the offset, as it does when in AT&T mode. */
+ /* All Intel opcodes have reversed operands except for "bound", "enter",
+ "monitor*", "mwait*", "tpause", and "umwait". We also don't reverse
+ intersegment "jmp" and "call" instructions with 2 immediate operands so
+ that the immediate segment precedes the offset, as it does when in AT&T
+ mode. */
if (intel_syntax
&& i.operands > 1
&& (strcmp (mnemonic, "bound") != 0)
&& (strcmp (mnemonic, "invlpga") != 0)
+ && (strncmp (mnemonic, "monitor", 7) != 0)
+ && (strncmp (mnemonic, "mwait", 5) != 0)
+ && (strcmp (mnemonic, "tpause") != 0)
+ && (strcmp (mnemonic, "umwait") != 0)
&& !(operand_type_check (i.types[0], imm)
&& operand_type_check (i.types[1], imm)))
swap_operands ();
: as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
}
- /* Zap movzx and movsx suffix. The suffix has been set from
- "word ptr" or "byte ptr" on the source operand in Intel syntax
- or extracted from mnemonic in AT&T syntax. But we'll use
- the destination register to choose the suffix for encoding. */
- if ((i.tm.base_opcode & ~9) == 0x0fb6)
- {
- /* In Intel syntax, there must be a suffix. In AT&T syntax, if
- there is no suffix, the default will be byte extension. */
- if (i.reg_operands != 2
- && !i.suffix
- && intel_syntax)
- as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
-
- i.suffix = 0;
- }
-
if (i.tm.opcode_modifier.fwait)
if (!add_prefix (FWAIT_OPCODE))
return;
i.op[0].disps->X_op = O_symbol;
}
- if (i.tm.opcode_modifier.rex64)
- i.rex |= REX_W;
-
/* For 8 bit registers we need an empty rex prefix. Also if the
instruction already has a prefix, we need to convert old
registers to new ones. */
if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
&& (i.op[x].regs->reg_flags & RegRex64) == 0)
{
+ gas_assert (!(i.op[x].regs->reg_flags & RegRex));
/* In case it is "hi" register, give up. */
if (i.op[x].regs->reg_num > 3)
as_bad (_("can't encode register '%s%s' in an "
if (i.rex == 0 && i.rex_encoding)
{
/* Check if we can add a REX_OPCODE byte. Look for 8 bit operand
- that uses legacy register. If it is "hi" register, don't add
+ that uses legacy register. If it is "hi" register, don't add
the REX_OPCODE byte. */
int x;
for (x = 0; x < 2; x++)
&& (i.op[x].regs->reg_flags & RegRex64) == 0
&& i.op[x].regs->reg_num > 3)
{
+ gas_assert (!(i.op[x].regs->reg_flags & RegRex));
i.rex_encoding = FALSE;
break;
}
i.dir_encoding = dir_encoding_store;
break;
case 0x4:
- /* {vex2} */
- i.vec_encoding = vex_encoding_vex2;
+ /* {vex} */
+ i.vec_encoding = vex_encoding_vex;
break;
case 0x5:
/* {vex3} */
if (!current_templates)
{
-check_suffix:
+ check_suffix:
if (mnem_p > mnemonic)
{
/* See if we can get a match by trimming off a suffix. */
{
unsigned int op;
i386_cpu_flags cpu;
- static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
/* Templates allowing for ZMMword as well as YMMword and/or XMMword for
any one operand are implicity requiring AVX512VL support if the actual
i386_opcode_modifier suffix_check;
i386_operand_type operand_types [MAX_OPERANDS];
int addr_prefix_disp;
- unsigned int j;
- unsigned int found_cpu_match, size_match;
- unsigned int check_register;
+ unsigned int j, size_match, check_register;
enum i386_error specific_error = 0;
#if MAX_OPERANDS != 5
/* Check processor support. */
i.error = unsupported;
- found_cpu_match = (cpu_flags_match (t)
- == CPU_FLAGS_PERFECT_MATCH);
- if (!found_cpu_match)
+ if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
continue;
/* Check AT&T mnemonic. */
if (intel_mnemonic && t->opcode_modifier.attmnemonic)
continue;
- /* Check AT&T/Intel syntax and Intel64/AMD64 ISA. */
+ /* Check AT&T/Intel syntax. */
i.error = unsupported_syntax;
if ((intel_syntax && t->opcode_modifier.attsyntax)
- || (!intel_syntax && t->opcode_modifier.intelsyntax)
- || (intel64 && t->opcode_modifier.amd64)
- || (!intel64 && t->opcode_modifier.intel64))
+ || (!intel_syntax && t->opcode_modifier.intelsyntax))
continue;
+ /* Check Intel64/AMD64 ISA. */
+ switch (isa64)
+ {
+ default:
+ /* Default: Don't accept Intel64. */
+ if (t->opcode_modifier.isa64 == INTEL64)
+ continue;
+ break;
+ case amd64:
+ /* -mamd64: Don't accept Intel64 and Intel64 only. */
+ if (t->opcode_modifier.isa64 >= INTEL64)
+ continue;
+ break;
+ case intel64:
+ /* -mintel64: Don't accept AMD64. */
+ if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
+ continue;
+ break;
+ }
+
/* Check the suffix. */
i.error = invalid_instruction_suffix;
if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
if (i.suffix == QWORD_MNEM_SUFFIX
&& flag_code != CODE_64BIT
&& (intel_syntax
- ? (!t->opcode_modifier.ignoresize
+ ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
&& !t->opcode_modifier.broadcast
&& !intel_float_operand (t->name))
: intel_float_operand (t->name) != 2)
else if (i.suffix == LONG_MNEM_SUFFIX
&& !cpu_arch_flags.bitfield.cpui386
&& (intel_syntax
- ? (!t->opcode_modifier.ignoresize
+ ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
&& !intel_float_operand (t->name))
: intel_float_operand (t->name) != 2)
&& ((operand_types[0].bitfield.class != RegMMX
break;
}
- /* Address size prefix will turn Disp64/Disp32/Disp16 operand
- into Disp32/Disp16/Disp32 operand. */
- if (i.prefix[ADDR_PREFIX] != 0)
- {
- /* There should be only one Disp operand. */
- switch (flag_code)
- {
- case CODE_16BIT:
- for (j = 0; j < MAX_OPERANDS; j++)
- {
- if (operand_types[j].bitfield.disp16)
- {
- addr_prefix_disp = j;
- operand_types[j].bitfield.disp32 = 1;
- operand_types[j].bitfield.disp16 = 0;
- break;
- }
- }
+ if (!t->opcode_modifier.jump
+ || t->opcode_modifier.jump == JUMP_ABSOLUTE)
+ {
+ /* There should be only one Disp operand. */
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (operand_type_check (operand_types[j], disp))
break;
- case CODE_32BIT:
- for (j = 0; j < MAX_OPERANDS; j++)
+ if (j < MAX_OPERANDS)
+ {
+ bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0);
+
+ addr_prefix_disp = j;
+
+ /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
+ operand into Disp32/Disp32/Disp16/Disp32 operand. */
+ switch (flag_code)
{
- if (operand_types[j].bitfield.disp32)
+ case CODE_16BIT:
+ override = !override;
+ /* Fall through. */
+ case CODE_32BIT:
+ if (operand_types[j].bitfield.disp32
+ && operand_types[j].bitfield.disp16)
{
- addr_prefix_disp = j;
- operand_types[j].bitfield.disp32 = 0;
- operand_types[j].bitfield.disp16 = 1;
- break;
+ operand_types[j].bitfield.disp16 = override;
+ operand_types[j].bitfield.disp32 = !override;
}
- }
- break;
- case CODE_64BIT:
- for (j = 0; j < MAX_OPERANDS; j++)
- {
- if (operand_types[j].bitfield.disp64)
+ operand_types[j].bitfield.disp32s = 0;
+ operand_types[j].bitfield.disp64 = 0;
+ break;
+
+ case CODE_64BIT:
+ if (operand_types[j].bitfield.disp32s
+ || operand_types[j].bitfield.disp64)
{
- addr_prefix_disp = j;
- operand_types[j].bitfield.disp64 = 0;
- operand_types[j].bitfield.disp32 = 1;
- break;
+ operand_types[j].bitfield.disp64 &= !override;
+ operand_types[j].bitfield.disp32s &= !override;
+ operand_types[j].bitfield.disp32 = override;
}
+ operand_types[j].bitfield.disp16 = 0;
+ break;
}
- break;
}
- }
+ }
/* Force 0x8b encoding for "mov foo@GOT, %eax". */
if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0)
if (!t->opcode_modifier.d)
continue;
-check_reverse:
+ check_reverse:
if (!(size_match & MATCH_REVERSE))
continue;
/* Try reversing direction of operands. */
/* Found either forward/reverse 2, 3 or 4 operand match here:
slip through to break. */
}
- if (!found_cpu_match)
- continue;
/* Check if vector and VEX operands are valid. */
if (check_VecOperands (t) || VEX_check_operands (t))
as_warn (_("indirect %s without `*'"), t->name);
if (t->opcode_modifier.isprefix
- && t->opcode_modifier.ignoresize)
+ && t->opcode_modifier.mnemonicsize == IGNORESIZE)
{
/* Warn them that a data or address size prefix doesn't
affect assembly of the next line of code. */
else if (i.tm.opcode_modifier.size == SIZE64)
i.suffix = QWORD_MNEM_SUFFIX;
else if (i.reg_operands
- && (i.operands > 1 || i.types[0].bitfield.class == Reg))
+ && (i.operands > 1 || i.types[0].bitfield.class == Reg)
+ && !i.tm.opcode_modifier.addrprefixopreg)
{
+ unsigned int numop = i.operands;
+
+ /* movsx/movzx want only their source operand considered here, for the
+ ambiguity checking below. The suffix will be replaced afterwards
+ to represent the destination (register). */
+ if (((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w)
+ || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
+ --i.operands;
+
+ /* crc32 needs REX.W set regardless of suffix / source operand size. */
+ if (i.tm.base_opcode == 0xf20f38f0
+ && i.tm.operand_types[1].bitfield.qword)
+ i.rex |= REX_W;
+
/* If there's no instruction mnemonic suffix we try to invent one
based on GPR operands. */
if (!i.suffix)
Destination register type is more significant than source
register type. crc32 in SSE4.2 prefers source register
type. */
- if (i.tm.base_opcode == 0xf20f38f0
- && i.types[0].bitfield.class == Reg)
- {
- if (i.types[0].bitfield.byte)
- i.suffix = BYTE_MNEM_SUFFIX;
- else if (i.types[0].bitfield.word)
- i.suffix = WORD_MNEM_SUFFIX;
- else if (i.types[0].bitfield.dword)
- i.suffix = LONG_MNEM_SUFFIX;
- else if (i.types[0].bitfield.qword)
- i.suffix = QWORD_MNEM_SUFFIX;
- }
-
- if (!i.suffix)
- {
- int op;
+ unsigned int op = i.tm.base_opcode != 0xf20f38f0 ? i.operands : 1;
- if (i.tm.base_opcode == 0xf20f38f0)
- {
- /* We have to know the operand size for crc32. */
- as_bad (_("ambiguous memory operand size for `%s`"),
- i.tm.name);
- return 0;
- }
+ while (op--)
+ if (i.tm.operand_types[op].bitfield.instance == InstanceNone
+ || i.tm.operand_types[op].bitfield.instance == Accum)
+ {
+ if (i.types[op].bitfield.class != Reg)
+ continue;
+ if (i.types[op].bitfield.byte)
+ i.suffix = BYTE_MNEM_SUFFIX;
+ else if (i.types[op].bitfield.word)
+ i.suffix = WORD_MNEM_SUFFIX;
+ else if (i.types[op].bitfield.dword)
+ i.suffix = LONG_MNEM_SUFFIX;
+ else if (i.types[op].bitfield.qword)
+ i.suffix = QWORD_MNEM_SUFFIX;
+ else
+ continue;
+ break;
+ }
- for (op = i.operands; --op >= 0;)
- if (i.tm.operand_types[op].bitfield.instance == InstanceNone
- || i.tm.operand_types[op].bitfield.instance == Accum)
- {
- if (i.types[op].bitfield.class != Reg)
- continue;
- if (i.types[op].bitfield.byte)
- i.suffix = BYTE_MNEM_SUFFIX;
- else if (i.types[op].bitfield.word)
- i.suffix = WORD_MNEM_SUFFIX;
- else if (i.types[op].bitfield.dword)
- i.suffix = LONG_MNEM_SUFFIX;
- else if (i.types[op].bitfield.qword)
- i.suffix = QWORD_MNEM_SUFFIX;
- else
- continue;
- break;
- }
- }
+ /* As an exception, movsx/movzx silently default to a byte source
+ in AT&T mode. */
+ if ((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w
+ && !i.suffix && !intel_syntax)
+ i.suffix = BYTE_MNEM_SUFFIX;
}
else if (i.suffix == BYTE_MNEM_SUFFIX)
{
if (intel_syntax
- && i.tm.opcode_modifier.ignoresize
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
&& i.tm.opcode_modifier.no_bsuf)
i.suffix = 0;
else if (!check_byte_reg ())
else if (i.suffix == LONG_MNEM_SUFFIX)
{
if (intel_syntax
- && i.tm.opcode_modifier.ignoresize
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
&& i.tm.opcode_modifier.no_lsuf
&& !i.tm.opcode_modifier.todword
&& !i.tm.opcode_modifier.toqword)
else if (i.suffix == QWORD_MNEM_SUFFIX)
{
if (intel_syntax
- && i.tm.opcode_modifier.ignoresize
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
&& i.tm.opcode_modifier.no_qsuf
&& !i.tm.opcode_modifier.todword
&& !i.tm.opcode_modifier.toqword)
else if (i.suffix == WORD_MNEM_SUFFIX)
{
if (intel_syntax
- && i.tm.opcode_modifier.ignoresize
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
&& i.tm.opcode_modifier.no_wsuf)
i.suffix = 0;
else if (!check_word_reg ())
return 0;
}
- else if (intel_syntax && i.tm.opcode_modifier.ignoresize)
+ else if (intel_syntax
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
/* Do nothing if the instruction is going to ignore the prefix. */
;
else
abort ();
+
+ /* Undo the movsx/movzx change done above. */
+ i.operands = numop;
}
- else if (i.tm.opcode_modifier.defaultsize
- && !i.suffix
- /* exclude fldenv/frstor/fsave/fstenv */
- && i.tm.opcode_modifier.no_ssuf
- /* exclude sysret */
- && i.tm.base_opcode != 0x0f07)
+ else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
+ && !i.suffix)
{
i.suffix = stackop_size;
if (stackop_size == LONG_MNEM_SUFFIX)
i.tm.name);
}
}
- else if (intel_syntax
- && !i.suffix
+ else if (!i.suffix
&& (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
|| i.tm.opcode_modifier.jump == JUMP_BYTE
|| i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
}
}
- if (!i.suffix)
- {
- if (!intel_syntax)
+ if (!i.suffix
+ && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
+ /* Also cover lret/retf/iret in 64-bit mode. */
+ || (flag_code == CODE_64BIT
+ && !i.tm.opcode_modifier.no_lsuf
+ && !i.tm.opcode_modifier.no_qsuf))
+ && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
+ /* Accept FLDENV et al without suffix. */
+ && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
+ {
+ unsigned int suffixes, evex = 0;
+
+ suffixes = !i.tm.opcode_modifier.no_bsuf;
+ if (!i.tm.opcode_modifier.no_wsuf)
+ suffixes |= 1 << 1;
+ if (!i.tm.opcode_modifier.no_lsuf)
+ suffixes |= 1 << 2;
+ if (!i.tm.opcode_modifier.no_ldsuf)
+ suffixes |= 1 << 3;
+ if (!i.tm.opcode_modifier.no_ssuf)
+ suffixes |= 1 << 4;
+ if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
+ suffixes |= 1 << 5;
+
+ /* For [XYZ]MMWORD operands inspect operand sizes. While generally
+ also suitable for AT&T syntax mode, it was requested that this be
+ restricted to just Intel syntax. */
+ if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast)
{
- if (i.tm.opcode_modifier.w)
+ unsigned int op;
+
+ for (op = 0; op < i.tm.operands; ++op)
{
- as_bad (_("no instruction mnemonic suffix given and "
- "no register operands; can't size instruction"));
- return 0;
+ if (is_evex_encoding (&i.tm)
+ && !cpu_arch_flags.bitfield.cpuavx512vl)
+ {
+ if (i.tm.operand_types[op].bitfield.ymmword)
+ i.tm.operand_types[op].bitfield.xmmword = 0;
+ if (i.tm.operand_types[op].bitfield.zmmword)
+ i.tm.operand_types[op].bitfield.ymmword = 0;
+ if (!i.tm.opcode_modifier.evex
+ || i.tm.opcode_modifier.evex == EVEXDYN)
+ i.tm.opcode_modifier.evex = EVEX512;
+ }
+
+ if (i.tm.operand_types[op].bitfield.xmmword
+ + i.tm.operand_types[op].bitfield.ymmword
+ + i.tm.operand_types[op].bitfield.zmmword < 2)
+ continue;
+
+ /* Any properly sized operand disambiguates the insn. */
+ if (i.types[op].bitfield.xmmword
+ || i.types[op].bitfield.ymmword
+ || i.types[op].bitfield.zmmword)
+ {
+ suffixes &= ~(7 << 6);
+ evex = 0;
+ break;
+ }
+
+ if ((i.flags[op] & Operand_Mem)
+ && i.tm.operand_types[op].bitfield.unspecified)
+ {
+ if (i.tm.operand_types[op].bitfield.xmmword)
+ suffixes |= 1 << 6;
+ if (i.tm.operand_types[op].bitfield.ymmword)
+ suffixes |= 1 << 7;
+ if (i.tm.operand_types[op].bitfield.zmmword)
+ suffixes |= 1 << 8;
+ if (is_evex_encoding (&i.tm))
+ evex = EVEX512;
+ }
}
}
- else
- {
- unsigned int suffixes;
- suffixes = !i.tm.opcode_modifier.no_bsuf;
- if (!i.tm.opcode_modifier.no_wsuf)
- suffixes |= 1 << 1;
- if (!i.tm.opcode_modifier.no_lsuf)
- suffixes |= 1 << 2;
- if (!i.tm.opcode_modifier.no_ldsuf)
- suffixes |= 1 << 3;
- if (!i.tm.opcode_modifier.no_ssuf)
- suffixes |= 1 << 4;
- if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
- suffixes |= 1 << 5;
-
- /* There are more than suffix matches. */
- if (i.tm.opcode_modifier.w
- || ((suffixes & (suffixes - 1))
- && !i.tm.opcode_modifier.defaultsize
- && !i.tm.opcode_modifier.ignoresize))
+ /* Are multiple suffixes / operand sizes allowed? */
+ if (suffixes & (suffixes - 1))
+ {
+ if (intel_syntax
+ && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
+ || operand_check == check_error))
{
as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
return 0;
}
+ if (operand_check == check_error)
+ {
+ as_bad (_("no instruction mnemonic suffix given and "
+ "no register operands; can't size `%s'"), i.tm.name);
+ return 0;
+ }
+ if (operand_check == check_warning)
+ as_warn (_("%s; using default for `%s'"),
+ intel_syntax
+ ? _("ambiguous operand size")
+ : _("no instruction mnemonic suffix given and "
+ "no register operands"),
+ i.tm.name);
+
+ if (i.tm.opcode_modifier.floatmf)
+ i.suffix = SHORT_MNEM_SUFFIX;
+ else if ((i.tm.base_opcode | 8) == 0xfbe
+ || (i.tm.base_opcode == 0x63
+ && i.tm.cpu_flags.bitfield.cpu64))
+ /* handled below */;
+ else if (evex)
+ i.tm.opcode_modifier.evex = evex;
+ else if (flag_code == CODE_16BIT)
+ i.suffix = WORD_MNEM_SUFFIX;
+ else if (!i.tm.opcode_modifier.no_lsuf)
+ i.suffix = LONG_MNEM_SUFFIX;
+ else
+ i.suffix = QWORD_MNEM_SUFFIX;
}
}
+ if ((i.tm.base_opcode | 8) == 0xfbe
+ || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
+ {
+ /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
+ In AT&T syntax, if there is no suffix (warned about above), the default
+ will be byte extension. */
+ if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
+ i.tm.base_opcode |= 1;
+
+ /* For further processing, the suffix should represent the destination
+ (register). This is already the case when one was used with
+ mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
+ no suffix to begin with. */
+ if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
+ {
+ if (i.types[1].bitfield.word)
+ i.suffix = WORD_MNEM_SUFFIX;
+ else if (i.types[1].bitfield.qword)
+ i.suffix = QWORD_MNEM_SUFFIX;
+ else
+ i.suffix = LONG_MNEM_SUFFIX;
+
+ i.tm.opcode_modifier.w = 0;
+ }
+ }
+
+ if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
+ i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
+ != (i.tm.operand_types[1].bitfield.class == Reg);
+
/* Change the opcode based on the operand size given by i.suffix. */
switch (i.suffix)
{
/* It's not a byte, select word/dword operation. */
if (i.tm.opcode_modifier.w)
{
- if (i.tm.opcode_modifier.shortform)
+ if (i.short_form)
i.tm.base_opcode |= 8;
else
i.tm.base_opcode |= 1;
/* Now select between word & dword operations via the operand
size prefix, except for instructions that will ignore this
prefix anyway. */
- if (i.reg_operands > 0
- && i.types[0].bitfield.class == Reg
- && i.tm.opcode_modifier.addrprefixopreg
- && (i.tm.operand_types[0].bitfield.instance == Accum
- || i.operands == 1))
- {
- /* The address size override prefix changes the size of the
- first operand. */
- if ((flag_code == CODE_32BIT
- && i.op[0].regs->reg_type.bitfield.word)
- || (flag_code != CODE_32BIT
- && i.op[0].regs->reg_type.bitfield.dword))
- if (!add_prefix (ADDR_PREFIX_OPCODE))
- return 0;
- }
- else if (i.suffix != QWORD_MNEM_SUFFIX
- && !i.tm.opcode_modifier.ignoresize
- && !i.tm.opcode_modifier.floatmf
- && !is_any_vex_encoding (&i.tm)
- && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
- || (flag_code == CODE_64BIT
- && i.tm.opcode_modifier.jump == JUMP_BYTE)))
+ if (i.suffix != QWORD_MNEM_SUFFIX
+ && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
+ && !i.tm.opcode_modifier.floatmf
+ && !is_any_vex_encoding (&i.tm)
+ && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
+ || (flag_code == CODE_64BIT
+ && i.tm.opcode_modifier.jump == JUMP_BYTE)))
{
unsigned int prefix = DATA_PREFIX_OPCODE;
if (i.suffix == QWORD_MNEM_SUFFIX
&& flag_code == CODE_64BIT
&& !i.tm.opcode_modifier.norex64
+ && !i.tm.opcode_modifier.vexw
/* Special case for xchg %rax,%rax. It is NOP and doesn't
need rex64. */
&& ! (i.operands == 2
break;
}
- if (i.reg_operands != 0
- && i.operands > 1
- && i.tm.opcode_modifier.addrprefixopreg
- && i.tm.operand_types[0].bitfield.instance != Accum)
+ if (i.tm.opcode_modifier.addrprefixopreg)
{
- /* Check invalid register operand when the address size override
- prefix changes the size of register operands. */
- unsigned int op;
- enum { need_word, need_dword, need_qword } need;
+ gas_assert (!i.suffix);
+ gas_assert (i.reg_operands);
- if (flag_code == CODE_32BIT)
- need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
+ if (i.tm.operand_types[0].bitfield.instance == Accum
+ || i.operands == 1)
+ {
+ /* The address size override prefix changes the size of the
+ first operand. */
+ if (flag_code == CODE_64BIT
+ && i.op[0].regs->reg_type.bitfield.word)
+ {
+ as_bad (_("16-bit addressing unavailable for `%s'"),
+ i.tm.name);
+ return 0;
+ }
+
+ if ((flag_code == CODE_32BIT
+ ? i.op[0].regs->reg_type.bitfield.word
+ : i.op[0].regs->reg_type.bitfield.dword)
+ && !add_prefix (ADDR_PREFIX_OPCODE))
+ return 0;
+ }
else
{
- if (i.prefix[ADDR_PREFIX])
+ /* Check invalid register operand when the address size override
+ prefix changes the size of register operands. */
+ unsigned int op;
+ enum { need_word, need_dword, need_qword } need;
+
+ if (flag_code == CODE_32BIT)
+ need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
+ else if (i.prefix[ADDR_PREFIX])
need = need_dword;
else
need = flag_code == CODE_64BIT ? need_qword : need_word;
- }
- for (op = 0; op < i.operands; op++)
- if (i.types[op].bitfield.class == Reg
- && ((need == need_word
- && !i.op[op].regs->reg_type.bitfield.word)
- || (need == need_dword
- && !i.op[op].regs->reg_type.bitfield.dword)
- || (need == need_qword
- && !i.op[op].regs->reg_type.bitfield.qword)))
- {
- as_bad (_("invalid register operand size for `%s'"),
- i.tm.name);
- return 0;
- }
+ for (op = 0; op < i.operands; op++)
+ {
+ if (i.types[op].bitfield.class != Reg)
+ continue;
+
+ switch (need)
+ {
+ case need_word:
+ if (i.op[op].regs->reg_type.bitfield.word)
+ continue;
+ break;
+ case need_dword:
+ if (i.op[op].regs->reg_type.bitfield.dword)
+ continue;
+ break;
+ case need_qword:
+ if (i.op[op].regs->reg_type.bitfield.qword)
+ continue;
+ break;
+ }
+
+ as_bad (_("invalid register operand size for `%s'"),
+ i.tm.name);
+ return 0;
+ }
+ }
}
return 1;
&& i.tm.operand_types[op].bitfield.word)
continue;
- /* crc32 doesn't generate this warning. */
- if (i.tm.base_opcode == 0xf20f38f0)
+ /* crc32 only wants its source operand checked here. */
+ if (i.tm.base_opcode == 0xf20f38f0 && op)
continue;
- if ((i.types[op].bitfield.word
- || i.types[op].bitfield.dword
- || i.types[op].bitfield.qword)
- && i.op[op].regs->reg_num < 4
- /* Prohibit these changes in 64bit mode, since the lowering
- would be more complicated. */
- && flag_code != CODE_64BIT)
- {
-#if REGISTER_WARNINGS
- if (!quiet_warnings)
- as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
- register_prefix,
- (i.op[op].regs + (i.types[op].bitfield.word
- ? REGNAM_AL - REGNAM_AX
- : REGNAM_AL - REGNAM_EAX))->reg_name,
- register_prefix,
- i.op[op].regs->reg_name,
- i.suffix);
-#endif
- continue;
- }
/* Any other register is bad. */
if (i.types[op].bitfield.class == Reg
|| i.types[op].bitfield.class == RegMMX
i.suffix);
return 0;
}
- /* Warn if the e prefix on a general reg is missing. */
- else if ((!quiet_warnings || flag_code == CODE_64BIT)
- && i.types[op].bitfield.word
+ /* Error if the e prefix on a general reg is missing. */
+ else if (i.types[op].bitfield.word
&& (i.tm.operand_types[op].bitfield.class == Reg
|| i.tm.operand_types[op].bitfield.instance == Accum)
&& i.tm.operand_types[op].bitfield.dword)
{
- /* Prohibit these changes in the 64bit mode, since the
- lowering is more complicated. */
- if (flag_code == CODE_64BIT)
- {
- as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
- register_prefix, i.op[op].regs->reg_name,
- i.suffix);
- return 0;
- }
-#if REGISTER_WARNINGS
- as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
- register_prefix,
- (i.op[op].regs + REGNAM_EAX - REGNAM_AX)->reg_name,
- register_prefix, i.op[op].regs->reg_name, i.suffix);
-#endif
+ as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
+ register_prefix, i.op[op].regs->reg_name,
+ i.suffix);
+ return 0;
}
/* Warn if the r prefix on a general reg is present. */
else if (i.types[op].bitfield.qword
i.suffix);
return 0;
}
- /* Warn if the e or r prefix on a general reg is present. */
- else if ((!quiet_warnings || flag_code == CODE_64BIT)
- && (i.types[op].bitfield.dword
+ /* Error if the e or r prefix on a general reg is present. */
+ else if ((i.types[op].bitfield.dword
|| i.types[op].bitfield.qword)
&& (i.tm.operand_types[op].bitfield.class == Reg
|| i.tm.operand_types[op].bitfield.instance == Accum)
&& i.tm.operand_types[op].bitfield.word)
{
- /* Prohibit these changes in the 64bit mode, since the
- lowering is more complicated. */
- if (flag_code == CODE_64BIT)
- {
- as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
- register_prefix, i.op[op].regs->reg_name,
- i.suffix);
- return 0;
- }
-#if REGISTER_WARNINGS
- as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
- register_prefix,
- (i.op[op].regs + REGNAM_AX - REGNAM_EAX)->reg_name,
- register_prefix, i.op[op].regs->reg_name, i.suffix);
-#endif
+ as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
+ register_prefix, i.op[op].regs->reg_name,
+ i.suffix);
+ return 0;
}
return 1;
}
}
else
{
-duplicate:
+ duplicate:
i.operands++;
i.reg_operands++;
i.tm.operands++;
on one of their operands, the default segment is ds. */
default_seg = &ds;
}
- else if (i.tm.opcode_modifier.shortform)
+ else if (i.short_form)
{
/* The register or float register operand is in operand
0 or 1. */
}
}
- if (i.tm.base_opcode == 0x8d /* lea */
- && i.seg[0]
- && !quiet_warnings)
- as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
+ if ((i.seg[0] || i.prefix[SEG_PREFIX])
+ && i.tm.base_opcode == 0x8d /* lea */
+ && !is_any_vex_encoding(&i.tm))
+ {
+ if (!quiet_warnings)
+ as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
+ if (optimize)
+ {
+ i.seg[0] = NULL;
+ i.prefix[SEG_PREFIX] = 0;
+ }
+ }
/* If a segment was explicitly specified, and the specified segment
- is not the default, use an opcode prefix to select it. If we
- never figured out what the default segment is, then default_seg
- will be zero at this point, and the specified segment prefix will
- always be used. */
- if ((i.seg[0]) && (i.seg[0] != default_seg))
+ is neither the default nor the one already recorded from a prefix,
+ use an opcode prefix to select it. If we never figured out what
+ the default segment is, then default_seg will be zero at this
+ point, and the specified segment prefix will always be used. */
+ if (i.seg[0]
+ && i.seg[0] != default_seg
+ && i.seg[0]->seg_prefix != i.prefix[SEG_PREFIX])
{
if (!add_prefix (i.seg[0]->seg_prefix))
return 0;
return default_seg;
}
+static unsigned int
+flip_code16 (unsigned int code16)
+{
+ gas_assert (i.tm.operands == 1);
+
+ return !(i.prefix[REX_PREFIX] & REX_W)
+ && (code16 ? i.tm.operand_types[0].bitfield.disp32
+ || i.tm.operand_types[0].bitfield.disp32s
+ : i.tm.operand_types[0].bitfield.disp16)
+ ? CODE16 : 0;
+}
+
static void
output_branch (void)
{
{
prefix = 1;
i.prefixes -= 1;
- code16 ^= CODE16;
+ code16 ^= flip_code16(code16);
}
/* Pentium4 branch hints. */
if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
/* BND prefixed jump. */
if (i.prefix[BND_PREFIX] != 0)
{
- FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
- i.prefixes -= 1;
+ prefix++;
+ i.prefixes--;
}
- if (i.prefixes != 0 && !intel_syntax)
- as_warn (_("skipping prefixes on this instruction"));
+ if (i.prefixes != 0)
+ as_warn (_("skipping prefixes on `%s'"), i.tm.name);
/* It's always a symbol; End frag & setup for relax.
Make sure there is enough room in this frag for the largest
if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
|| i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
*p++ = i.prefix[SEG_PREFIX];
+ if (i.prefix[BND_PREFIX] != 0)
+ *p++ = BND_PREFIX_OPCODE;
if (i.prefix[REX_PREFIX] != 0)
*p++ = i.prefix[REX_PREFIX];
*p = i.tm.base_opcode;
{
FRAG_APPEND_1_CHAR (DATA_PREFIX_OPCODE);
i.prefixes -= 1;
- code16 ^= CODE16;
+ code16 ^= flip_code16(code16);
}
size = 4;
size = 2;
}
- if (i.prefix[REX_PREFIX] != 0)
+ /* BND prefixed jump. */
+ if (i.prefix[BND_PREFIX] != 0)
{
- FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]);
+ FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
i.prefixes -= 1;
}
- /* BND prefixed jump. */
- if (i.prefix[BND_PREFIX] != 0)
+ if (i.prefix[REX_PREFIX] != 0)
{
- FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
+ FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]);
i.prefixes -= 1;
}
- if (i.prefixes != 0 && !intel_syntax)
- as_warn (_("skipping prefixes on this instruction"));
+ if (i.prefixes != 0)
+ as_warn (_("skipping prefixes on `%s'"), i.tm.name);
p = frag_more (i.tm.opcode_length + size);
switch (i.tm.opcode_length)
i.prefixes -= 1;
code16 ^= CODE16;
}
- if (i.prefix[REX_PREFIX] != 0)
- {
- prefix++;
- i.prefixes -= 1;
- }
+
+ gas_assert (!i.prefix[REX_PREFIX]);
size = 4;
if (code16)
size = 2;
- if (i.prefixes != 0 && !intel_syntax)
- as_warn (_("skipping prefixes on this instruction"));
+ if (i.prefixes != 0)
+ as_warn (_("skipping prefixes on `%s'"), i.tm.name);
/* 1 opcode; 2 segment; offset */
p = frag_more (prefix + 1 + 2 + size);
}
/* Return 1 for test, and, cmp, add, sub, inc and dec which may
- be macro-fused with conditional jumps. */
+ be macro-fused with conditional jumps.
+ NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
+ or is one of the following format:
+
+ cmp m, imm
+ add m, imm
+ sub m, imm
+ test m, imm
+ and m, imm
+ inc m
+ dec m
+
+ it is unfusible. */
static int
-maybe_fused_with_jcc_p (void)
+maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
{
/* No RIP address. */
if (i.base_reg && i.base_reg->reg_num == RegIP)
if (is_any_vex_encoding (&i.tm))
return 0;
- /* and, add, sub with destination register. */
- if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
- || i.tm.base_opcode <= 5
+ /* add, sub without add/sub m, imm. */
+ if (i.tm.base_opcode <= 5
|| (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
|| ((i.tm.base_opcode | 3) == 0x83
- && ((i.tm.extension_opcode | 1) == 0x5
+ && (i.tm.extension_opcode == 0x5
|| i.tm.extension_opcode == 0x0)))
- return (i.types[1].bitfield.class == Reg
- || i.types[1].bitfield.instance == Accum);
+ {
+ *mf_cmp_p = mf_cmp_alu_cmp;
+ return !(i.mem_operands && i.imm_operands);
+ }
+
+ /* and without and m, imm. */
+ if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
+ || ((i.tm.base_opcode | 3) == 0x83
+ && i.tm.extension_opcode == 0x4))
+ {
+ *mf_cmp_p = mf_cmp_test_and;
+ return !(i.mem_operands && i.imm_operands);
+ }
- /* test, cmp with any register. */
+ /* test without test m imm. */
if ((i.tm.base_opcode | 1) == 0x85
|| (i.tm.base_opcode | 1) == 0xa9
|| ((i.tm.base_opcode | 1) == 0xf7
- && i.tm.extension_opcode == 0)
- || (i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
+ && i.tm.extension_opcode == 0))
+ {
+ *mf_cmp_p = mf_cmp_test_and;
+ return !(i.mem_operands && i.imm_operands);
+ }
+
+ /* cmp without cmp m, imm. */
+ if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
|| ((i.tm.base_opcode | 3) == 0x83
&& (i.tm.extension_opcode == 0x7)))
- return (i.types[0].bitfield.class == Reg
- || i.types[0].bitfield.instance == Accum
- || i.types[1].bitfield.class == Reg
- || i.types[1].bitfield.instance == Accum);
+ {
+ *mf_cmp_p = mf_cmp_alu_cmp;
+ return !(i.mem_operands && i.imm_operands);
+ }
- /* inc, dec with any register. */
+ /* inc, dec without inc/dec m. */
if ((i.tm.cpu_flags.bitfield.cpuno64
&& (i.tm.base_opcode | 0xf) == 0x4f)
|| ((i.tm.base_opcode | 1) == 0xff
&& i.tm.extension_opcode <= 0x1))
- return (i.types[0].bitfield.class == Reg
- || i.types[0].bitfield.instance == Accum);
+ {
+ *mf_cmp_p = mf_cmp_incdec;
+ return !i.mem_operands;
+ }
return 0;
}
/* Return 1 if a FUSED_JCC_PADDING frag should be generated. */
static int
-add_fused_jcc_padding_frag_p (void)
+add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
{
/* NB: Don't work with COND_JUMP86 without i386. */
if (!align_branch_power
|| !(align_branch & align_branch_fused_bit))
return 0;
- if (maybe_fused_with_jcc_p ())
+ if (maybe_fused_with_jcc_p (mf_cmp_p))
{
if (last_insn.kind == last_insn_other
|| last_insn.seg != now_seg)
/* Return 1 if a BRANCH_PADDING frag should be generated. */
static int
-add_branch_padding_frag_p (enum align_branch_kind *branch_p)
+add_branch_padding_frag_p (enum align_branch_kind *branch_p,
+ enum mf_jcc_kind *mf_jcc_p)
{
int add_padding;
}
else
{
+ /* Because J<cc> and JN<cc> share same group in macro-fusible table,
+ igore the lowest bit. */
+ *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
*branch_p = align_branch_jcc;
if ((align_branch & align_branch_jcc_bit))
add_padding = 1;
offsetT insn_start_off;
fragS *fragP = NULL;
enum align_branch_kind branch = align_branch_none;
+ /* The initializer is arbitrary just to avoid uninitialized error.
+ it's actually either assigned in add_branch_padding_frag_p
+ or never be used. */
+ enum mf_jcc_kind mf_jcc = mf_jcc_jo;
#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
if (IS_ELF && x86_used_note)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
if (i.has_regmmx
|| i.tm.base_opcode == 0xf77 /* emms */
- || i.tm.base_opcode == 0xf0e /* femms */)
+ || i.tm.base_opcode == 0xf0e /* femms */
+ || i.tm.base_opcode == 0xf2a /* cvtpi2ps */
+ || i.tm.base_opcode == 0x660f2a /* cvtpi2pd */)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
if (i.has_regxmm)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
insn_start_frag = frag_now;
insn_start_off = frag_now_fix ();
- if (add_branch_padding_frag_p (&branch))
+ if (add_branch_padding_frag_p (&branch, &mf_jcc))
{
char *p;
/* Branch can be 8 bytes. Leave some room for prefixes. */
ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
NULL, 0, p);
+ fragP->tc_frag_data.mf_type = mf_jcc;
fragP->tc_frag_data.branch_type = branch;
fragP->tc_frag_data.max_bytes = max_branch_padding_size;
}
unsigned char *q;
unsigned int j;
unsigned int prefix;
+ enum mf_cmp_kind mf_cmp;
if (avoid_fence
&& (i.tm.base_opcode == 0xfaee8
if (branch)
/* Skip if this is a branch. */
;
- else if (add_fused_jcc_padding_frag_p ())
+ else if (add_fused_jcc_padding_frag_p (&mf_cmp))
{
/* Make room for padding. */
frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
NULL, 0, p);
+ fragP->tc_frag_data.mf_type = mf_cmp;
fragP->tc_frag_data.branch_type = align_branch_fused;
fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
}
#if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
/* For x32, add a dummy REX_OPCODE prefix for mov/add with
R_X86_64_GOTTPOFF relocation so that linker can safely
- perform IE->LE optimization. */
+ perform IE->LE optimization. A dummy REX_OPCODE prefix
+ is also needed for lea with R_X86_64_GOTPC32_TLSDESC
+ relocation for GDesc -> IE/LE optimization. */
if (x86_elf_abi == X86_64_X32_ABI
&& i.operands == 2
- && i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+ && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+ || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
&& i.prefix[REX_PREFIX] == 0)
add_prefix (REX_OPCODE);
#endif
&& i.rm.regmem == 5))
&& (i.rm.mode == 2
|| (i.rm.mode == 0 && i.rm.regmem == 5))
+ && !is_any_vex_encoding(&i.tm)
&& ((i.operands == 1
&& i.tm.base_opcode == 0xff
&& (i.rm.reg == 2 || i.rm.reg == 4))
|| (i.operands == 2
&& (i.tm.base_opcode == 0x8b
|| i.tm.base_opcode == 0x85
- || (i.tm.base_opcode & 0xc7) == 0x03))))
+ || (i.tm.base_opcode & ~0x38) == 0x03))))
{
if (object_64bit)
{
operand_type_set (&bigdisp, 0);
if (i.jumpabsolute
+ || i.types[this_operand].bitfield.baseindex
|| (current_templates->start->opcode_modifier.jump != JUMP
&& current_templates->start->opcode_modifier.jump != JUMP_DWORD))
{
- bigdisp.bitfield.disp32 = 1;
+ i386_addressing_mode ();
override = (i.prefix[ADDR_PREFIX] != 0);
if (flag_code == CODE_64BIT)
{
bigdisp.bitfield.disp32s = 1;
bigdisp.bitfield.disp64 = 1;
}
+ else
+ bigdisp.bitfield.disp32 = 1;
}
else if ((flag_code == CODE_16BIT) ^ override)
- {
- bigdisp.bitfield.disp32 = 0;
bigdisp.bitfield.disp16 = 1;
- }
+ else
+ bigdisp.bitfield.disp32 = 1;
}
else
{
- /* For PC-relative branches, the width of the displacement
- is dependent upon data size, not address size. */
+ /* For PC-relative branches, the width of the displacement may be
+ dependent upon data size, but is never dependent upon address size.
+ Also make sure to not unintentionally match against a non-PC-relative
+ branch template. */
+ static templates aux_templates;
+ const insn_template *t = current_templates->start;
+ bfd_boolean has_intel64 = FALSE;
+
+ aux_templates.start = t;
+ while (++t < current_templates->end)
+ {
+ if (t->opcode_modifier.jump
+ != current_templates->start->opcode_modifier.jump)
+ break;
+ if ((t->opcode_modifier.isa64 >= INTEL64))
+ has_intel64 = TRUE;
+ }
+ if (t < current_templates->end)
+ {
+ aux_templates.end = t;
+ current_templates = &aux_templates;
+ }
+
override = (i.prefix[DATA_PREFIX] != 0);
if (flag_code == CODE_64BIT)
{
- if (override || i.suffix == WORD_MNEM_SUFFIX)
+ if ((override || i.suffix == WORD_MNEM_SUFFIX)
+ && (!intel64 || !has_intel64))
bigdisp.bitfield.disp16 = 1;
else
- {
- bigdisp.bitfield.disp32 = 1;
- bigdisp.bitfield.disp32s = 1;
- }
+ bigdisp.bitfield.disp32s = 1;
}
else
{
}
#endif
+ if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
+ /* Constants get taken care of by optimize_disp(). */
+ && exp->X_op != O_constant)
+ i.types[this_operand].bitfield.disp8 = 1;
+
/* Check if this is a displacement only operand. */
bigdisp = i.types[this_operand];
bigdisp.bitfield.disp8 = 0;
if (i.prefix[ADDR_PREFIX])
addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
+ else if (flag_code == CODE_16BIT
+ && current_templates->start->cpu_flags.bitfield.cpumpx
+ /* Avoid replacing the "16-bit addressing not allowed" diagnostic
+ from md_assemble() by "is not a valid base/index expression"
+ when there is a base and/or index. */
+ && !i.types[this_operand].bitfield.baseindex)
+ {
+ /* MPX insn memory operands with neither base nor index must be forced
+ to use 32-bit addressing in 16-bit mode. */
+ addr_mode = CODE_32BIT;
+ i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
+ ++i.prefixes;
+ gas_assert (!i.types[this_operand].bitfield.disp16);
+ gas_assert (!i.types[this_operand].bitfield.disp32);
+ }
else
{
addr_mode = flag_code;
else
return 1;
-bad_address:
+ bad_address:
as_bad (_("`%s' is not a valid %s expression"),
operand_string, kind);
return 0;
}
#endif
+/* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
+ Note also work for Skylake and Cascadelake.
+---------------------------------------------------------------------
+| JCC | ADD/SUB/CMP | INC/DEC | TEST/AND |
+| ------ | ----------- | ------- | -------- |
+| Jo | N | N | Y |
+| Jno | N | N | Y |
+| Jc/Jb | Y | N | Y |
+| Jae/Jnb | Y | N | Y |
+| Je/Jz | Y | Y | Y |
+| Jne/Jnz | Y | Y | Y |
+| Jna/Jbe | Y | N | Y |
+| Ja/Jnbe | Y | N | Y |
+| Js | N | N | Y |
+| Jns | N | N | Y |
+| Jp/Jpe | N | N | Y |
+| Jnp/Jpo | N | N | Y |
+| Jl/Jnge | Y | Y | Y |
+| Jge/Jnl | Y | Y | Y |
+| Jle/Jng | Y | Y | Y |
+| Jg/Jnle | Y | Y | Y |
+--------------------------------------------------------------------- */
+static int
+i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
+{
+ if (mf_cmp == mf_cmp_alu_cmp)
+ return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
+ || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
+ if (mf_cmp == mf_cmp_incdec)
+ return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
+ || mf_jcc == mf_jcc_jle);
+ if (mf_cmp == mf_cmp_test_and)
+ return 1;
+ return 0;
+}
+
/* Return the next non-empty frag. */
static fragS *
/* Return the next jcc frag after BRANCH_PADDING. */
static fragS *
-i386_next_jcc_frag (fragS *fragP)
+i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
{
- if (!fragP)
+ fragS *branch_fragP;
+ if (!pad_fragP)
return NULL;
- if (fragP->fr_type == rs_machine_dependent
- && (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
+ if (pad_fragP->fr_type == rs_machine_dependent
+ && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
== BRANCH_PADDING))
{
- fragP = i386_next_non_empty_frag (fragP);
- if (fragP->fr_type != rs_machine_dependent)
+ branch_fragP = i386_next_non_empty_frag (pad_fragP);
+ if (branch_fragP->fr_type != rs_machine_dependent)
return NULL;
- if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == COND_JUMP)
- return fragP;
+ if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
+ && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
+ pad_fragP->tc_frag_data.mf_type))
+ return branch_fragP;
}
return NULL;
*/
cmp_fragP = i386_next_non_empty_frag (next_fragP);
pad_fragP = i386_next_non_empty_frag (cmp_fragP);
- branch_fragP = i386_next_jcc_frag (pad_fragP);
+ branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
if (branch_fragP)
{
/* The BRANCH_PADDING frag is merged with the
break;
case OPTION_MAMD64:
- intel64 = 0;
+ isa64 = amd64;
break;
case OPTION_MINTEL64:
- intel64 = 1;
+ isa64 = intel64;
break;
case 'O':