/* tc-i386.c -- Assemble code for the Intel 80386
- Copyright (C) 1989-2018 Free Software Foundation, Inc.
+ Copyright (C) 1989-2019 Free Software Foundation, Inc.
This file is part of GAS, the GNU Assembler.
#include "elf/x86-64.h"
#include "opcodes/i386-init.h"
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#else
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifndef INT_MAX
+#define INT_MAX (int) (((unsigned) (-1)) >> 1)
+#endif
+#endif
+
#ifndef REGISTER_WARNINGS
#define REGISTER_WARNINGS 1
#endif
#define END_OF_INSN '\0'
+/* This matches the C -> StaticRounding alias in the opcode table. */
+#define commutative staticrounding
+
/*
'templates' is for grouping together 'template' structures for opcodes
of the same name. This is only used for storing the insns in the grand
vex256
} avxscalar;
+/* Encode VEX WIG instructions with specific vex.w. */
+static enum
+ {
+ vexw0 = 0,
+ vexw1
+ } vexwig;
+
/* Encode scalar EVEX LIG instructions with specific vector length. */
static enum
{
CPU_MOVDIRI_FLAGS, 0 },
{ STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
CPU_MOVDIR64B_FLAGS, 0 },
+ { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
+ CPU_AVX512_BF16_FLAGS, 0 },
+ { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
+ CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
+ { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
+ CPU_ENQCMD_FLAGS, 0 },
};
static const noarch_entry cpu_noarch[] =
{ STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
{ STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
{ STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
+ { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
+ { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
+ { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
};
#ifdef I386COFF
/* Place the longer NOP first. */
int last;
int offset;
- const unsigned char *nops = patt[max_single_nop_size - 1];
+ const unsigned char *nops;
+
+ if (max_single_nop_size < 1)
+ {
+ as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
+ max_single_nop_size);
+ return;
+ }
+
+ nops = patt[max_single_nop_size - 1];
/* Use the smaller one if the requsted one isn't available. */
if (nops == NULL)
return x;
}
-static const i386_operand_type acc32 = OPERAND_TYPE_ACC32;
-static const i386_operand_type acc64 = OPERAND_TYPE_ACC64;
static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
}
/* Check reverse. */
- gas_assert (i.operands == 2);
+ gas_assert (i.operands >= 2 && i.operands <= 3);
- for (j = 0; j < 2; j++)
+ for (j = 0; j < i.operands; j++)
{
- if ((t->operand_types[j].bitfield.reg
- || t->operand_types[j].bitfield.acc)
- && !match_operand_size (t, j, !j))
+ unsigned int given = i.operands - j - 1;
+
+ if (t->operand_types[j].bitfield.reg
+ && !match_operand_size (t, j, given))
goto mismatch;
- if ((i.flags[!j] & Operand_Mem) && !match_mem_size (t, j, !j))
+ if (t->operand_types[j].bitfield.regsimd
+ && !match_simd_size (t, j, given))
+ goto mismatch;
+
+ if (t->operand_types[j].bitfield.acc
+ && (!match_operand_size (t, j, given)
+ || !match_simd_size (t, j, given)))
+ goto mismatch;
+
+ if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
goto mismatch;
}
static void ps (symbolS *);
static void
-pi (char *line, i386_insn *x)
+pi (const char *line, i386_insn *x)
{
unsigned int j;
{ OPERAND_TYPE_REG16, "r16" },
{ OPERAND_TYPE_REG32, "r32" },
{ OPERAND_TYPE_REG64, "r64" },
+ { OPERAND_TYPE_ACC8, "acc8" },
+ { OPERAND_TYPE_ACC16, "acc16" },
+ { OPERAND_TYPE_ACC32, "acc32" },
+ { OPERAND_TYPE_ACC64, "acc64" },
{ OPERAND_TYPE_IMM8, "i8" },
{ OPERAND_TYPE_IMM8, "i8s" },
{ OPERAND_TYPE_IMM16, "i16" },
{ OPERAND_TYPE_FLOATACC, "FAcc" },
{ OPERAND_TYPE_SREG2, "SReg2" },
{ OPERAND_TYPE_SREG3, "SReg3" },
- { OPERAND_TYPE_ACC, "Acc" },
{ OPERAND_TYPE_JUMPABSOLUTE, "Jump Absolute" },
{ OPERAND_TYPE_REGMMX, "rMMX" },
{ OPERAND_TYPE_REGXMM, "rXMM" },
for (j = 0; j < ARRAY_SIZE (type_names); j++)
{
a = operand_type_and (t, type_names[j].mask);
- if (!operand_type_all_zero (&a))
+ if (operand_type_equal (&a, &type_names[j].mask))
fprintf (stdout, "%s, ", type_names[j].name);
}
fflush (stdout);
unsigned int register_specifier;
unsigned int implied_prefix;
unsigned int vector_length;
+ unsigned int w;
/* Check register specifier. */
if (i.vex.register_specifier)
else
register_specifier = 0xf;
- /* Use 2-byte VEX prefix by swapping destination and source
- operand. */
- if (i.vec_encoding != vex_encoding_vex3
+ /* Use 2-byte VEX prefix by swapping destination and source operand
+ if there are more than 1 register operand. */
+ if (i.reg_operands > 1
+ && i.vec_encoding != vex_encoding_vex3
&& i.dir_encoding == dir_encoding_default
&& i.operands == i.reg_operands
+ && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
&& i.tm.opcode_modifier.vexopcode == VEX0F
- && i.tm.opcode_modifier.load
+ && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
&& i.rex == REX_B)
{
unsigned int xchg = i.operands - 1;
i.rm.regmem = i.rm.reg;
i.rm.reg = xchg;
- /* Use the next insn. */
- i.tm = t[1];
+ if (i.tm.opcode_modifier.d)
+ i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
+ ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
+ else /* Use the next insn. */
+ i.tm = t[1];
+ }
+
+ /* Use 2-byte VEX prefix by swapping commutative source operands if there
+ are no memory operands and at least 3 register ones. */
+ if (i.reg_operands >= 3
+ && i.vec_encoding != vex_encoding_vex3
+ && i.reg_operands == i.operands - i.imm_operands
+ && i.tm.opcode_modifier.vex
+ && i.tm.opcode_modifier.commutative
+ && (i.tm.opcode_modifier.sse2avx || optimize > 1)
+ && i.rex == REX_B
+ && i.vex.register_specifier
+ && !(i.vex.register_specifier->reg_flags & RegRex))
+ {
+ unsigned int xchg = i.operands - i.reg_operands;
+ union i386_op temp_op;
+ i386_operand_type temp_type;
+
+ gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F);
+ gas_assert (!i.tm.opcode_modifier.sae);
+ gas_assert (operand_type_equal (&i.types[i.operands - 2],
+ &i.types[i.operands - 3]));
+ gas_assert (i.rm.mode == 3);
+
+ temp_type = i.types[xchg];
+ i.types[xchg] = i.types[xchg + 1];
+ i.types[xchg + 1] = temp_type;
+ temp_op = i.op[xchg];
+ i.op[xchg] = i.op[xchg + 1];
+ i.op[xchg + 1] = temp_op;
+
+ i.rex = 0;
+ xchg = i.rm.regmem | 8;
+ i.rm.regmem = ~register_specifier & 0xf;
+ gas_assert (!(i.rm.regmem & 8));
+ i.vex.register_specifier += xchg - i.rm.regmem;
+ register_specifier = ~xchg & 0xf;
}
if (i.tm.opcode_modifier.vex == VEXScalar)
abort ();
}
+ /* Check the REX.W bit and VEXW. */
+ if (i.tm.opcode_modifier.vexw == VEXWIG)
+ w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
+ else if (i.tm.opcode_modifier.vexw)
+ w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
+ else
+ w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
+
/* Use 2-byte VEX prefix if possible. */
- if (i.vec_encoding != vex_encoding_vex3
+ if (w == 0
+ && i.vec_encoding != vex_encoding_vex3
&& i.tm.opcode_modifier.vexopcode == VEX0F
- && i.tm.opcode_modifier.vexw != VEXW1
&& (i.rex & (REX_W | REX_X | REX_B)) == 0)
{
/* 2-byte VEX prefix. */
else
{
/* 3-byte VEX prefix. */
- unsigned int m, w;
+ unsigned int m;
i.vex.length = 3;
of RXB bits from REX. */
i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
- /* Check the REX.W bit. */
- w = (i.rex & REX_W) ? 1 : 0;
- if (i.tm.opcode_modifier.vexw == VEXW1)
- w = 1;
-
i.vex.bytes[2] = (w << 7
| register_specifier << 3
| vector_length << 2
{
return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
|| t->opcode_modifier.broadcast || t->opcode_modifier.masking
- || t->opcode_modifier.staticrounding || t->opcode_modifier.sae;
+ || t->opcode_modifier.sae;
}
static INLINE bfd_boolean
i.vrex &= ~vrex_used;
gas_assert (i.vrex == 0);
- /* Check the REX.W bit. */
- w = (i.rex & REX_W) ? 1 : 0;
- if (i.tm.opcode_modifier.vexw)
- {
- if (i.tm.opcode_modifier.vexw == VEXW1)
- w = 1;
- }
- /* If w is not set it means we are dealing with WIG instruction. */
- else if (!w)
- {
- if (evexwig == evexw1)
- w = 1;
- }
+ /* Check the REX.W bit and VEXW. */
+ if (i.tm.opcode_modifier.vexw == VEXWIG)
+ w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
+ else if (i.tm.opcode_modifier.vexw)
+ w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
+ else
+ w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
/* Encode the U bit. */
implied_prefix |= 0x4;
static void
optimize_encoding (void)
{
- int j;
+ unsigned int j;
if (optimize_for_space
&& i.reg_operands == 1
&& i.tm.extension_opcode == 0x4)
|| ((i.tm.base_opcode == 0xf6
|| i.tm.base_opcode == 0xc6)
- && i.tm.extension_opcode == 0x0)))))
+ && i.tm.extension_opcode == 0x0)))
+ || (fits_in_imm7 (i.op[0].imms->X_add_number)
+ && i.tm.base_opcode == 0x83
+ && i.tm.extension_opcode == 0x4)))
|| (i.types[0].bitfield.qword
&& ((i.reg_operands == 2
&& i.op[0].regs == i.op[1].regs
{
/* Optimize: -O:
andq $imm31, %r64 -> andl $imm31, %r32
+ andq $imm7, %r64 -> andl $imm7, %r32
testq $imm31, %r64 -> testl $imm31, %r32
xorq %r64, %r64 -> xorl %r32, %r32
subq %r64, %r64 -> subl %r32, %r32
}
}
else if (optimize > 1
- && i.reg_operands == 3
+ && !optimize_for_space
+ && i.reg_operands == 2
+ && i.op[0].regs == i.op[1].regs
+ && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
+ || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
+ && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
+ {
+ /* Optimize: -O2:
+ andb %rN, %rN -> testb %rN, %rN
+ andw %rN, %rN -> testw %rN, %rN
+ andq %rN, %rN -> testq %rN, %rN
+ orb %rN, %rN -> testb %rN, %rN
+ orw %rN, %rN -> testw %rN, %rN
+ orq %rN, %rN -> testq %rN, %rN
+
+ and outside of 64-bit mode
+
+ andl %rN, %rN -> testl %rN, %rN
+ orl %rN, %rN -> testl %rN, %rN
+ */
+ i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
+ }
+ else if (i.reg_operands == 3
&& i.op[0].regs == i.op[1].regs
&& !i.types[2].bitfield.xmmword
&& (i.tm.opcode_modifier.vex
&& !i.rounding
&& is_evex_encoding (&i.tm)
&& (i.vec_encoding != vex_encoding_evex
+ || cpu_arch_isa_flags.bitfield.cpuavx512vl
|| i.tm.cpu_flags.bitfield.cpuavx512vl
|| (i.tm.operand_types[2].bitfield.zmmword
- && i.types[2].bitfield.ymmword)
- || cpu_arch_isa_flags.bitfield.cpuavx512vl)))
+ && i.types[2].bitfield.ymmword))))
&& ((i.tm.base_opcode == 0x55
|| i.tm.base_opcode == 0x6655
|| i.tm.base_opcode == 0x66df
|| i.tm.base_opcode == 0x6647)
&& i.tm.extension_opcode == None))
{
- /* Optimize: -O2:
+ /* Optimize: -O1:
VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
vpsubq and vpsubw:
EVEX VOP %zmmM, %zmmM, %zmmN
-> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
EVEX VOP %ymmM, %ymmM, %ymmN
-> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
VEX VOP %ymmM, %ymmM, %ymmN
-> VEX VOP %xmmM, %xmmM, %xmmN
VOP, one of vpandn and vpxor:
VOP, one of vpandnd and vpandnq:
EVEX VOP %zmmM, %zmmM, %zmmN
-> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
EVEX VOP %ymmM, %ymmM, %ymmN
-> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
VOP, one of vpxord and vpxorq:
EVEX VOP %zmmM, %zmmM, %zmmN
-> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
EVEX VOP %ymmM, %ymmM, %ymmN
-> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
VOP, one of kxord and kxorq:
VEX VOP %kM, %kM, %kN
-> VEX kxorw %kM, %kM, %kN
*/
if (is_evex_encoding (&i.tm))
{
- if (i.vec_encoding == vex_encoding_evex)
- i.tm.opcode_modifier.evex = EVEX128;
- else
+ if (i.vec_encoding != vex_encoding_evex)
{
i.tm.opcode_modifier.vex = VEX128;
i.tm.opcode_modifier.vexw = VEXW0;
i.tm.opcode_modifier.evex = 0;
}
+ else if (optimize > 1)
+ i.tm.opcode_modifier.evex = EVEX128;
+ else
+ return;
}
else if (i.tm.operand_types[0].bitfield.regmask)
{
i.types[j].bitfield.ymmword = 0;
}
}
+ else if (i.vec_encoding != vex_encoding_evex
+ && !i.types[0].bitfield.zmmword
+ && !i.types[1].bitfield.zmmword
+ && !i.mask
+ && !i.broadcast
+ && is_evex_encoding (&i.tm)
+ && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
+ || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
+ || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
+ || (i.tm.base_opcode & ~4) == 0x66db
+ || (i.tm.base_opcode & ~4) == 0x66eb)
+ && i.tm.extension_opcode == None)
+ {
+ /* Optimize: -O1:
+ VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
+ vmovdqu32 and vmovdqu64:
+ EVEX VOP %xmmM, %xmmN
+ -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
+ EVEX VOP %ymmM, %ymmN
+ -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
+ EVEX VOP %xmmM, mem
+ -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
+ EVEX VOP %ymmM, mem
+ -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
+ EVEX VOP mem, %xmmN
+ -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
+ EVEX VOP mem, %ymmN
+ -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
+ VOP, one of vpand, vpandn, vpor, vpxor:
+ EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
+ -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
+ EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
+ -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
+ EVEX VOP{d,q} mem, %xmmM, %xmmN
+ -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
+ EVEX VOP{d,q} mem, %ymmM, %ymmN
+ -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
+ */
+ for (j = 0; j < i.operands; j++)
+ if (operand_type_check (i.types[j], disp)
+ && i.op[j].disps->X_op == O_constant)
+ {
+ /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
+ has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
+ bytes, we choose EVEX Disp8 over VEX Disp32. */
+ int evex_disp8, vex_disp8;
+ unsigned int memshift = i.memshift;
+ offsetT n = i.op[j].disps->X_add_number;
+
+ evex_disp8 = fits_in_disp8 (n);
+ i.memshift = 0;
+ vex_disp8 = fits_in_disp8 (n);
+ if (evex_disp8 != vex_disp8)
+ {
+ i.memshift = memshift;
+ return;
+ }
+
+ i.types[j].bitfield.disp8 = vex_disp8;
+ break;
+ }
+ if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+ i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
+ i.tm.opcode_modifier.vex
+ = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
+ i.tm.opcode_modifier.vexw = VEXW0;
+ /* VPAND, VPOR, and VPXOR are commutative. */
+ if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df)
+ i.tm.opcode_modifier.commutative = 1;
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.masking = 0;
+ i.tm.opcode_modifier.broadcast = 0;
+ i.tm.opcode_modifier.disp8memshift = 0;
+ i.memshift = 0;
+ if (j < i.operands)
+ i.types[j].bitfield.disp8
+ = fits_in_disp8 (i.op[j].disps->X_add_number);
+ }
}
/* This is the guts of the machine-dependent assembler. LINE points to a
if (is_any_vex_encoding (&i.tm))
{
- if (flag_code == CODE_16BIT)
+ if (!cpu_arch_flags.bitfield.cpui286)
{
- as_bad (_("instruction `%s' isn't supported in 16-bit mode."),
+ as_bad (_("instruction `%s' isn't supported outside of protected mode."),
i.tm.name);
return;
}
}
/* If we are in 16-bit mode, do not allow addr16 or data16.
Similarly, in 32-bit mode, do not allow addr32 or data32. */
- if ((current_templates->start->opcode_modifier.size16
- || current_templates->start->opcode_modifier.size32)
+ if ((current_templates->start->opcode_modifier.size == SIZE16
+ || current_templates->start->opcode_modifier.size == SIZE32)
&& flag_code != CODE_64BIT
- && (current_templates->start->opcode_modifier.size32
+ && ((current_templates->start->opcode_modifier.size == SIZE32)
^ (flag_code == CODE_16BIT)))
{
as_bad (_("redundant %s prefix"),
if (!current_templates)
{
check_suffix:
- /* See if we can get a match by trimming off a suffix. */
- switch (mnem_p[-1])
+ if (mnem_p > mnemonic)
{
- case WORD_MNEM_SUFFIX:
- if (intel_syntax && (intel_float_operand (mnemonic) & 2))
- i.suffix = SHORT_MNEM_SUFFIX;
- else
- /* Fall through. */
- case BYTE_MNEM_SUFFIX:
- case QWORD_MNEM_SUFFIX:
- i.suffix = mnem_p[-1];
- mnem_p[-1] = '\0';
- current_templates = (const templates *) hash_find (op_hash,
- mnemonic);
- break;
- case SHORT_MNEM_SUFFIX:
- case LONG_MNEM_SUFFIX:
- if (!intel_syntax)
+ /* See if we can get a match by trimming off a suffix. */
+ switch (mnem_p[-1])
{
- i.suffix = mnem_p[-1];
- mnem_p[-1] = '\0';
- current_templates = (const templates *) hash_find (op_hash,
- mnemonic);
- }
- break;
-
- /* Intel Syntax. */
- case 'd':
- if (intel_syntax)
- {
- if (intel_float_operand (mnemonic) == 1)
+ case WORD_MNEM_SUFFIX:
+ if (intel_syntax && (intel_float_operand (mnemonic) & 2))
i.suffix = SHORT_MNEM_SUFFIX;
else
- i.suffix = LONG_MNEM_SUFFIX;
+ /* Fall through. */
+ case BYTE_MNEM_SUFFIX:
+ case QWORD_MNEM_SUFFIX:
+ i.suffix = mnem_p[-1];
mnem_p[-1] = '\0';
current_templates = (const templates *) hash_find (op_hash,
- mnemonic);
+ mnemonic);
+ break;
+ case SHORT_MNEM_SUFFIX:
+ case LONG_MNEM_SUFFIX:
+ if (!intel_syntax)
+ {
+ i.suffix = mnem_p[-1];
+ mnem_p[-1] = '\0';
+ current_templates = (const templates *) hash_find (op_hash,
+ mnemonic);
+ }
+ break;
+
+ /* Intel Syntax. */
+ case 'd':
+ if (intel_syntax)
+ {
+ if (intel_float_operand (mnemonic) == 1)
+ i.suffix = SHORT_MNEM_SUFFIX;
+ else
+ i.suffix = LONG_MNEM_SUFFIX;
+ mnem_p[-1] = '\0';
+ current_templates = (const templates *) hash_find (op_hash,
+ mnemonic);
+ }
+ break;
}
- break;
}
+
if (!current_templates)
{
as_bad (_("no such instruction: `%s'"), token_start);
/* Check RC/SAE. */
if (i.rounding)
{
- if ((i.rounding->type != saeonly
- && !t->opcode_modifier.staticrounding)
- || (i.rounding->type == saeonly
- && (t->opcode_modifier.staticrounding
- || !t->opcode_modifier.sae)))
+ if (!t->opcode_modifier.sae
+ || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
{
i.error = unsupported_rc_sae;
return 1;
for (t = current_templates->start; t < current_templates->end; t++)
{
addr_prefix_disp = -1;
+ found_reverse_match = 0;
if (i.operands != t->operands)
continue;
zero-extend %eax to %rax. */
if (flag_code == CODE_64BIT
&& t->base_opcode == 0x90
- && operand_type_equal (&i.types [0], &acc32)
- && operand_type_equal (&i.types [1], &acc32))
+ && i.types[0].bitfield.acc && i.types[0].bitfield.dword
+ && i.types[1].bitfield.acc && i.types[1].bitfield.dword)
continue;
/* xrelease mov %eax, <disp> is another special case. It must not
match the accumulator-only encoding of mov. */
&& i.types[0].bitfield.acc
&& operand_type_check (i.types[1], anymem))
continue;
+ /* Fall through. */
+
+ case 3:
if (!(size_match & MATCH_STRAIGHT))
goto check_reverse;
/* Reverse direction of operands if swapping is possible in the first
- the store form is requested, and the template is a load form,
- the non-default (swapped) form is requested. */
overlap1 = operand_type_and (operand_types[0], operand_types[1]);
- if (t->opcode_modifier.d && i.reg_operands == 2
+ if (t->opcode_modifier.d && i.reg_operands == i.operands
&& !operand_type_all_zero (&overlap1))
switch (i.dir_encoding)
{
case dir_encoding_default:
break;
}
- /* Fall through. */
-
- case 3:
/* If we want store form, we skip the current load. */
if ((i.dir_encoding == dir_encoding_store
|| i.dir_encoding == dir_encoding_swap)
if (!(size_match & MATCH_REVERSE))
continue;
/* Try reversing direction of operands. */
- overlap0 = operand_type_and (i.types[0], operand_types[1]);
- overlap1 = operand_type_and (i.types[1], operand_types[0]);
+ overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
+ overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
if (!operand_type_match (overlap0, i.types[0])
- || !operand_type_match (overlap1, i.types[1])
+ || !operand_type_match (overlap1, i.types[i.operands - 1])
|| (check_register
&& !operand_type_register_match (i.types[0],
- operand_types[1],
- i.types[1],
+ operand_types[i.operands - 1],
+ i.types[i.operands - 1],
operand_types[0])))
{
/* Does not match either direction. */
found_reverse_match = 0;
else if (operand_types[0].bitfield.tbyte)
found_reverse_match = Opcode_FloatD;
+ else if (operand_types[0].bitfield.xmmword
+ || operand_types[i.operands - 1].bitfield.xmmword
+ || operand_types[0].bitfield.regmmx
+ || operand_types[i.operands - 1].bitfield.regmmx
+ || is_any_vex_encoding(t))
+ found_reverse_match = (t->base_opcode & 0xee) != 0x6e
+ ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
else
found_reverse_match = Opcode_D;
if (t->opcode_modifier.floatr)
slip through to break. */
}
if (!found_cpu_match)
- {
- found_reverse_match = 0;
- continue;
- }
+ continue;
/* Check if vector and VEX operands are valid. */
if (check_VecOperands (t) || VEX_check_operands (t))
i.tm.base_opcode ^= found_reverse_match;
- i.tm.operand_types[0] = operand_types[1];
- i.tm.operand_types[1] = operand_types[0];
+ i.tm.operand_types[0] = operand_types[i.operands - 1];
+ i.tm.operand_types[i.operands - 1] = operand_types[0];
}
return t;
{
/* If matched instruction specifies an explicit instruction mnemonic
suffix, use it. */
- if (i.tm.opcode_modifier.size16)
+ if (i.tm.opcode_modifier.size == SIZE16)
i.suffix = WORD_MNEM_SUFFIX;
- else if (i.tm.opcode_modifier.size32)
+ else if (i.tm.opcode_modifier.size == SIZE32)
i.suffix = LONG_MNEM_SUFFIX;
- else if (i.tm.opcode_modifier.size64)
+ else if (i.tm.opcode_modifier.size == SIZE64)
i.suffix = QWORD_MNEM_SUFFIX;
else if (i.reg_operands)
{
Destination register type is more significant than source
register type. crc32 in SSE4.2 prefers source register
type. */
- if (i.tm.base_opcode == 0xf20f38f1)
+ if (i.tm.base_opcode == 0xf20f38f0 && i.types[0].bitfield.reg)
{
- if (i.types[0].bitfield.reg && i.types[0].bitfield.word)
+ if (i.types[0].bitfield.byte)
+ i.suffix = BYTE_MNEM_SUFFIX;
+ else if (i.types[0].bitfield.word)
i.suffix = WORD_MNEM_SUFFIX;
- else if (i.types[0].bitfield.reg && i.types[0].bitfield.dword)
+ else if (i.types[0].bitfield.dword)
i.suffix = LONG_MNEM_SUFFIX;
- else if (i.types[0].bitfield.reg && i.types[0].bitfield.qword)
+ else if (i.types[0].bitfield.qword)
i.suffix = QWORD_MNEM_SUFFIX;
}
- else if (i.tm.base_opcode == 0xf20f38f0)
- {
- if (i.types[0].bitfield.reg && i.types[0].bitfield.byte)
- i.suffix = BYTE_MNEM_SUFFIX;
- }
if (!i.suffix)
{
int op;
- if (i.tm.base_opcode == 0xf20f38f1
- || i.tm.base_opcode == 0xf20f38f0)
+ if (i.tm.base_opcode == 0xf20f38f0)
{
/* We have to know the operand size for crc32. */
as_bad (_("ambiguous memory operand size for `%s`"),
/* exclude fldenv/frstor/fsave/fstenv */
&& i.tm.opcode_modifier.no_ssuf)
{
- i.suffix = stackop_size;
+ if (stackop_size == LONG_MNEM_SUFFIX
+ && i.tm.base_opcode == 0xcf)
+ {
+ /* stackop_size is set to LONG_MNEM_SUFFIX for the
+ .code16gcc directive to support 16-bit mode with
+ 32-bit address. For IRET without a suffix, generate
+ 16-bit IRET (opcode 0xcf) to return from an interrupt
+ handler. */
+ i.suffix = WORD_MNEM_SUFFIX;
+ as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
+ }
+ else
+ i.suffix = stackop_size;
}
else if (intel_syntax
&& !i.suffix
else if (i.suffix != QWORD_MNEM_SUFFIX
&& !i.tm.opcode_modifier.ignoresize
&& !i.tm.opcode_modifier.floatmf
- && !i.tm.opcode_modifier.vex
- && !i.tm.opcode_modifier.vexopcode
- && !is_evex_encoding (&i.tm)
+ && !is_any_vex_encoding (&i.tm)
&& ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
|| (flag_code == CODE_64BIT
&& i.tm.opcode_modifier.jumpbyte)))
&& ! (i.operands == 2
&& i.tm.base_opcode == 0x90
&& i.tm.extension_opcode == None
- && operand_type_equal (&i.types [0], &acc64)
- && operand_type_equal (&i.types [1], &acc64)))
+ && i.types[0].bitfield.acc && i.types[0].bitfield.qword
+ && i.types[1].bitfield.acc && i.types[1].bitfield.qword))
i.rex |= REX_W;
break;
if (!IS_ELF)
return FALSE;
+#ifdef TE_SOLARIS
+ /* Don't emit PLT32 relocation on Solaris: neither native linker nor
+ krtld support it. */
+ return FALSE;
+#endif
+
/* Since there is no need to prepare for PLT branch on x86-64, we
can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
be used as a marker for 32-bit PC-relative branches. */
if (!IS_ELF || !x86_used_note)
return;
- x86_isa_1_used |= GNU_PROPERTY_X86_UINT32_VALID;
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
/* The .note.gnu.property section layout:
}
#endif
+static unsigned int
+encoding_length (const fragS *start_frag, offsetT start_off,
+ const char *frag_now_ptr)
+{
+ unsigned int len = 0;
+
+ if (start_frag != frag_now)
+ {
+ const fragS *fr = start_frag;
+
+ do {
+ len += fr->fr_fix;
+ fr = fr->fr_next;
+ } while (fr && fr != frag_now);
+ }
+
+ return len - start_off + (frag_now_ptr - frag_now->fr_literal);
+}
+
static void
output_insn (void)
{
x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
+ if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
+ x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
if (i.tm.cpu_flags.bitfield.cpu8087
|| i.tm.cpu_flags.bitfield.cpu287
Xfence instructions. */
if (i.tm.base_opcode != 0xf18
&& i.tm.base_opcode != 0xf0d
- && i.tm.base_opcode != 0xfae
+ && i.tm.base_opcode != 0xfaef8
&& (i.has_regmmx
|| i.tm.cpu_flags.bitfield.cpummx
|| i.tm.cpu_flags.bitfield.cpua3dnow
unsigned int prefix;
if (avoid_fence
- && i.tm.base_opcode == 0xfae
- && i.operands == 1
- && i.imm_operands == 1
- && (i.op[0].imms->X_add_number == 0xe8
- || i.op[0].imms->X_add_number == 0xf0
- || i.op[0].imms->X_add_number == 0xf8))
+ && (i.tm.base_opcode == 0xfaee8
+ || i.tm.base_opcode == 0xfaef0
+ || i.tm.base_opcode == 0xfaef8))
{
/* Encode lfence, mfence, and sfence as
f0 83 04 24 00 lock addl $0x0, (%{re}sp). */
if (i.tm.base_opcode & 0xff000000)
{
prefix = (i.tm.base_opcode >> 24) & 0xff;
- add_prefix (prefix);
+ if (!i.tm.cpu_flags.bitfield.cpupadlock
+ || prefix != REPE_PREFIX_OPCODE
+ || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE))
+ add_prefix (prefix);
}
break;
case 2:
if ((i.tm.base_opcode & 0xff0000) != 0)
{
prefix = (i.tm.base_opcode >> 16) & 0xff;
- if (!i.tm.cpu_flags.bitfield.cpupadlock
- || prefix != REPE_PREFIX_OPCODE
- || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE))
- add_prefix (prefix);
+ add_prefix (prefix);
}
break;
case 1:
if (i.imm_operands)
output_imm (insn_start_frag, insn_start_off);
+
+ /*
+ * frag_now_fix () returning plain abs_section_offset when we're in the
+ * absolute section, and abs_section_offset not getting updated as data
+ * gets added to the frag breaks the logic below.
+ */
+ if (now_seg != absolute_section)
+ {
+ j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
+ if (j > 15)
+ as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
+ j);
+ }
}
#ifdef DEBUG386
== O_subtract))))
|| reloc_type == BFD_RELOC_32_PCREL))
{
- offsetT add;
-
- if (insn_start_frag == frag_now)
- add = (p - frag_now->fr_literal) - insn_start_off;
- else
- {
- fragS *fr;
-
- add = insn_start_frag->fr_fix - insn_start_off;
- for (fr = insn_start_frag->fr_next;
- fr && fr != frag_now; fr = fr->fr_next)
- add += fr->fr_fix;
- add += p - frag_now->fr_literal;
- }
-
if (!object_64bit)
{
reloc_type = BFD_RELOC_386_GOTPC;
- i.op[n].imms->X_add_number += add;
+ i.op[n].imms->X_add_number +=
+ encoding_length (insn_start_frag, insn_start_off, p);
}
else if (reloc_type == BFD_RELOC_64)
reloc_type = BFD_RELOC_X86_64_GOTPC64;
/* Check for "call/jmp *mem", "mov mem, %reg",
"test %reg, mem" and "binop mem, %reg" where binop
is one of adc, add, and, cmp, or, sbb, sub, xor
- instructions. Always generate R_386_GOT32X for
- "sym*GOT" operand in 32-bit mode. */
- if ((generate_relax_relocations
- || (!object_64bit
- && i.rm.mode == 0
- && i.rm.regmem == 5))
+ instructions without data prefix. Always generate
+ R_386_GOT32X for "sym*GOT" operand in 32-bit mode. */
+ if (i.prefix[DATA_PREFIX] == 0
+ && (generate_relax_relocations
+ || (!object_64bit
+ && i.rm.mode == 0
+ && i.rm.regmem == 5))
&& (i.rm.mode == 2
|| (i.rm.mode == 0 && i.rm.regmem == 5))
&& ((i.operands == 1
(i.op[n].imms->X_op_symbol)->X_op)
== O_subtract))))
{
- offsetT add;
-
- if (insn_start_frag == frag_now)
- add = (p - frag_now->fr_literal) - insn_start_off;
- else
- {
- fragS *fr;
-
- add = insn_start_frag->fr_fix - insn_start_off;
- for (fr = insn_start_frag->fr_next;
- fr && fr != frag_now; fr = fr->fr_next)
- add += fr->fr_fix;
- add += p - frag_now->fr_literal;
- }
-
if (!object_64bit)
reloc_type = BFD_RELOC_386_GOTPC;
else if (size == 4)
reloc_type = BFD_RELOC_X86_64_GOTPC32;
else if (size == 8)
reloc_type = BFD_RELOC_X86_64_GOTPC64;
- i.op[n].imms->X_add_number += add;
+ i.op[n].imms->X_add_number +=
+ encoding_length (insn_start_frag, insn_start_off, p);
}
fix_new_exp (frag_now, p - frag_now->fr_literal, size,
i.op[n].imms, 0, reloc_type);
as_bad (_("missing or invalid expression `%s'"), save);
*input_line_pointer = c;
}
+ else if ((got_reloc == BFD_RELOC_386_PLT32
+ || got_reloc == BFD_RELOC_X86_64_PLT32)
+ && exp->X_op != O_symbol)
+ {
+ char c = *input_line_pointer;
+ *input_line_pointer = 0;
+ as_bad (_("invalid PLT expression `%s'"), save);
+ *input_line_pointer = c;
+ }
}
}
else
enum flag_code addr_mode = i386_addressing_mode ();
if (current_templates->start->opcode_modifier.isstring
- && !current_templates->start->opcode_modifier.immext
+ && !current_templates->start->cpu_flags.bitfield.cpupadlock
&& (current_templates->end[-1].opcode_modifier.isstring
|| i.mem_operands))
{
{
case BFD_RELOC_386_PLT32:
case BFD_RELOC_X86_64_PLT32:
- /* Make the jump instruction point to the address of the operand. At
- runtime we merely add the offset to the actual PLT entry. */
- value = -4;
+ /* Make the jump instruction point to the address of the operand.
+ At runtime we merely add the offset to the actual PLT entry.
+ NB: Subtract the offset size only for jump instructions. */
+ if (fixP->fx_pcrel)
+ value = -4;
break;
case BFD_RELOC_386_TLS_GD:
#define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
#define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
#define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
+#define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
struct option md_longopts[] =
{
{"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
{"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
{"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
+ {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
{"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
{"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
{"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
break;
+ case OPTION_MVEXWIG:
+ if (strcmp (arg, "0") == 0)
+ vexwig = evexw0;
+ else if (strcmp (arg, "1") == 0)
+ vexwig = evexw1;
+ else
+ as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
+ break;
+
case OPTION_MADD_BND_PREFIX:
add_bnd_prefix = 1;
break;
{
optimize_for_space = 1;
/* Turn on all encoding optimizations. */
- optimize = -1;
+ optimize = INT_MAX;
}
else
{
encode scalar AVX instructions with specific vector\n\
length\n"));
fprintf (stream, _("\
+ -mvexwig=[0|1] (default: 0)\n\
+ encode VEX instructions with specific VEX.W value\n\
+ for VEX.W bit ignored instructions\n"));
+ fprintf (stream, _("\
-mevexlig=[128|256|512] (default: 128)\n\
encode scalar EVEX instructions with specific vector\n\
length\n"));