#define SHORT_MNEM_SUFFIX 's'
#define LONG_MNEM_SUFFIX 'l'
#define QWORD_MNEM_SUFFIX 'q'
-#define XMMWORD_MNEM_SUFFIX 'x'
-#define YMMWORD_MNEM_SUFFIX 'y'
-#define ZMMWORD_MNEM_SUFFIX 'z'
/* Intel Syntax. Use a non-ascii letter since since it never appears
in instructions. */
#define LONG_DOUBLE_MNEM_SUFFIX '\1'
broadcast factor. */
struct Broadcast_Operation
{
- /* Type of broadcast: no broadcast, {1to8}, or {1to16}. */
+ /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}. */
int type;
/* Index of broadcasted operand. */
number_of_operands_mismatch,
invalid_instruction_suffix,
bad_imm4,
- old_gcc_only,
unsupported_with_intel_mnemonic,
unsupported_syntax,
unsupported,
disp_encoding_32bit
} disp_encoding;
+ /* Prefer the REX byte in encoding. */
+ bfd_boolean rex_encoding;
+
+ /* Disable instruction size optimization. */
+ bfd_boolean no_optimize;
+
/* How to encode vector instructions. */
enum
{
&& !defined (TE_GNU) \
&& !defined (TE_LINUX) \
&& !defined (TE_NACL) \
- && !defined (TE_NETWARE) \
&& !defined (TE_FreeBSD) \
&& !defined (TE_DragonFly) \
&& !defined (TE_NetBSD)))
0 if att mnemonic. */
static int intel_mnemonic = !SYSV386_COMPAT;
-/* 1 if support old (<= 2.8.1) versions of gcc. */
-static int old_gcc = OLDGCC_COMPAT;
-
/* 1 if pseudo registers are permitted. */
static int allow_pseudo_reg = 0;
}
sse_check, operand_check = check_warning;
+/* Optimization:
+ 1. Clear the REX_W bit with register operand if possible.
+ 2. Above plus use 128bit vector instruction to clear the full vector
+ register.
+ */
+static int optimize = 0;
+
+/* Optimization:
+ 1. Clear the REX_W bit with register operand if possible.
+ 2. Above plus use 128bit vector instruction to clear the full vector
+ register.
+ 3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
+ "testb $imm7,%r8".
+ */
+static int optimize_for_space = 0;
+
/* Register prefix used for error message. */
static const char *register_prefix = "%";
CPU_WBNOINVD_FLAGS, 0 },
{ STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
CPU_PCONFIG_FLAGS, 0 },
+ { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
+ CPU_WAITPKG_FLAGS, 0 },
+ { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
+ CPU_CLDEMOTE_FLAGS, 0 },
};
static const noarch_entry cpu_noarch[] =
/* Hash table for register lookup. */
static struct hash_control *reg_hash;
\f
-void
-i386_align_code (fragS *fragP, int count)
-{
/* Various efficient no-op patterns for aligning code labels.
Note: Don't try to assemble the instructions in the comments.
0L and 0w are not legal. */
- static const unsigned char f32_1[] =
- {0x90}; /* nop */
- static const unsigned char f32_2[] =
- {0x66,0x90}; /* xchg %ax,%ax */
- static const unsigned char f32_3[] =
- {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
- static const unsigned char f32_4[] =
- {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
- static const unsigned char f32_5[] =
- {0x90, /* nop */
- 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
- static const unsigned char f32_6[] =
- {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
- static const unsigned char f32_7[] =
- {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
- static const unsigned char f32_8[] =
- {0x90, /* nop */
- 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
- static const unsigned char f32_9[] =
- {0x89,0xf6, /* movl %esi,%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const unsigned char f32_10[] =
- {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const unsigned char f32_11[] =
- {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const unsigned char f32_12[] =
- {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
- 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
- static const unsigned char f32_13[] =
- {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const unsigned char f32_14[] =
- {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const unsigned char f16_3[] =
- {0x8d,0x74,0x00}; /* lea 0(%esi),%esi */
- static const unsigned char f16_4[] =
- {0x8d,0xb4,0x00,0x00}; /* lea 0w(%si),%si */
- static const unsigned char f16_5[] =
- {0x90, /* nop */
- 0x8d,0xb4,0x00,0x00}; /* lea 0w(%si),%si */
- static const unsigned char f16_6[] =
- {0x89,0xf6, /* mov %si,%si */
- 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */
- static const unsigned char f16_7[] =
- {0x8d,0x74,0x00, /* lea 0(%si),%si */
- 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */
- static const unsigned char f16_8[] =
- {0x8d,0xb4,0x00,0x00, /* lea 0w(%si),%si */
- 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */
- static const unsigned char jump_31[] =
- {0xeb,0x1d,0x90,0x90,0x90,0x90,0x90, /* jmp .+31; lotsa nops */
- 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,
- 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,
- 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
- static const unsigned char *const f32_patt[] = {
- f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
- f32_9, f32_10, f32_11, f32_12, f32_13, f32_14
- };
- static const unsigned char *const f16_patt[] = {
- f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8
- };
- /* nopl (%[re]ax) */
- static const unsigned char alt_3[] =
- {0x0f,0x1f,0x00};
- /* nopl 0(%[re]ax) */
- static const unsigned char alt_4[] =
- {0x0f,0x1f,0x40,0x00};
- /* nopl 0(%[re]ax,%[re]ax,1) */
- static const unsigned char alt_5[] =
- {0x0f,0x1f,0x44,0x00,0x00};
- /* nopw 0(%[re]ax,%[re]ax,1) */
- static const unsigned char alt_6[] =
- {0x66,0x0f,0x1f,0x44,0x00,0x00};
- /* nopl 0L(%[re]ax) */
- static const unsigned char alt_7[] =
- {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
- /* nopl 0L(%[re]ax,%[re]ax,1) */
- static const unsigned char alt_8[] =
- {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
- /* nopw 0L(%[re]ax,%[re]ax,1) */
- static const unsigned char alt_9[] =
- {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
- /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
- static const unsigned char alt_10[] =
- {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
- static const unsigned char *const alt_patt[] = {
- f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
- alt_9, alt_10
- };
+static const unsigned char f32_1[] =
+ {0x90}; /* nop */
+static const unsigned char f32_2[] =
+ {0x66,0x90}; /* xchg %ax,%ax */
+static const unsigned char f32_3[] =
+ {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
+static const unsigned char f32_4[] =
+ {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
+static const unsigned char f32_6[] =
+ {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
+static const unsigned char f32_7[] =
+ {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
+static const unsigned char f16_3[] =
+ {0x8d,0x74,0x00}; /* lea 0(%si),%si */
+static const unsigned char f16_4[] =
+ {0x8d,0xb4,0x00,0x00}; /* lea 0W(%si),%si */
+static const unsigned char jump_disp8[] =
+ {0xeb}; /* jmp disp8 */
+static const unsigned char jump32_disp32[] =
+ {0xe9}; /* jmp disp32 */
+static const unsigned char jump16_disp32[] =
+ {0x66,0xe9}; /* jmp disp32 */
+/* 32-bit NOPs patterns. */
+static const unsigned char *const f32_patt[] = {
+ f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
+};
+/* 16-bit NOPs patterns. */
+static const unsigned char *const f16_patt[] = {
+ f32_1, f32_2, f16_3, f16_4
+};
+/* nopl (%[re]ax) */
+static const unsigned char alt_3[] =
+ {0x0f,0x1f,0x00};
+/* nopl 0(%[re]ax) */
+static const unsigned char alt_4[] =
+ {0x0f,0x1f,0x40,0x00};
+/* nopl 0(%[re]ax,%[re]ax,1) */
+static const unsigned char alt_5[] =
+ {0x0f,0x1f,0x44,0x00,0x00};
+/* nopw 0(%[re]ax,%[re]ax,1) */
+static const unsigned char alt_6[] =
+ {0x66,0x0f,0x1f,0x44,0x00,0x00};
+/* nopl 0L(%[re]ax) */
+static const unsigned char alt_7[] =
+ {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
+/* nopl 0L(%[re]ax,%[re]ax,1) */
+static const unsigned char alt_8[] =
+ {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+/* nopw 0L(%[re]ax,%[re]ax,1) */
+static const unsigned char alt_9[] =
+ {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+/* nopw %cs:0L(%[re]ax,%[re]ax,1) */
+static const unsigned char alt_10[] =
+ {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+/* data16 nopw %cs:0L(%eax,%eax,1) */
+static const unsigned char alt_11[] =
+ {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+/* 32-bit and 64-bit NOPs patterns. */
+static const unsigned char *const alt_patt[] = {
+ f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
+ alt_9, alt_10, alt_11
+};
- /* Only align for at least a positive non-zero boundary. */
- if (count <= 0 || count > MAX_MEM_FOR_RS_ALIGN_CODE)
- return;
+/* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
+ size of a single NOP instruction MAX_SINGLE_NOP_SIZE. */
+
+static void
+i386_output_nops (char *where, const unsigned char *const *patt,
+ int count, int max_single_nop_size)
+
+{
+ /* Place the longer NOP first. */
+ int last;
+ int offset;
+ const unsigned char *nops = patt[max_single_nop_size - 1];
+
+ /* Use the smaller one if the requsted one isn't available. */
+ if (nops == NULL)
+ {
+ max_single_nop_size--;
+ nops = patt[max_single_nop_size - 1];
+ }
+
+ last = count % max_single_nop_size;
+
+ count -= last;
+ for (offset = 0; offset < count; offset += max_single_nop_size)
+ memcpy (where + offset, nops, max_single_nop_size);
+
+ if (last)
+ {
+ nops = patt[last - 1];
+ if (nops == NULL)
+ {
+ /* Use the smaller one plus one-byte NOP if the needed one
+ isn't available. */
+ last--;
+ nops = patt[last - 1];
+ memcpy (where + offset, nops, last);
+ where[offset + last] = *patt[0];
+ }
+ else
+ memcpy (where + offset, nops, last);
+ }
+}
+
+static INLINE int
+fits_in_imm7 (offsetT num)
+{
+ return (num & 0x7f) == num;
+}
+
+static INLINE int
+fits_in_imm31 (offsetT num)
+{
+ return (num & 0x7fffffff) == num;
+}
+
+/* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
+ single NOP instruction LIMIT. */
+
+void
+i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
+{
+ const unsigned char *const *patt = NULL;
+ int max_single_nop_size;
+ /* Maximum number of NOPs before switching to jump over NOPs. */
+ int max_number_of_nops;
+
+ switch (fragP->fr_type)
+ {
+ case rs_fill_nop:
+ case rs_align_code:
+ break;
+ default:
+ return;
+ }
/* We need to decide which NOP sequence to use for 32bit and
64bit. When -mtune= is used:
if (flag_code == CODE_16BIT)
{
- if (count > 8)
- {
- memcpy (fragP->fr_literal + fragP->fr_fix,
- jump_31, count);
- /* Adjust jump offset. */
- fragP->fr_literal[fragP->fr_fix + 1] = count - 2;
- }
- else
- memcpy (fragP->fr_literal + fragP->fr_fix,
- f16_patt[count - 1], count);
+ patt = f16_patt;
+ max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
+ /* Limit number of NOPs to 2 in 16-bit mode. */
+ max_number_of_nops = 2;
}
else
{
- const unsigned char *const *patt = NULL;
-
if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
{
/* PROCESSOR_UNKNOWN means that all ISAs may be used. */
if (patt == f32_patt)
{
- /* If the padding is less than 15 bytes, we use the normal
- ones. Otherwise, we use a jump instruction and adjust
- its offset. */
- int limit;
+ max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
+ /* Limit number of NOPs to 2 for older processors. */
+ max_number_of_nops = 2;
+ }
+ else
+ {
+ max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
+ /* Limit number of NOPs to 7 for newer processors. */
+ max_number_of_nops = 7;
+ }
+ }
- /* For 64bit, the limit is 3 bytes. */
- if (flag_code == CODE_64BIT
- && fragP->tc_frag_data.isa_flags.bitfield.cpulm)
- limit = 3;
- else
- limit = 15;
- if (count < limit)
- memcpy (fragP->fr_literal + fragP->fr_fix,
- patt[count - 1], count);
- else
- {
- memcpy (fragP->fr_literal + fragP->fr_fix,
- jump_31, count);
- /* Adjust jump offset. */
- fragP->fr_literal[fragP->fr_fix + 1] = count - 2;
- }
+ if (limit == 0)
+ limit = max_single_nop_size;
+
+ if (fragP->fr_type == rs_fill_nop)
+ {
+ /* Output NOPs for .nop directive. */
+ if (limit > max_single_nop_size)
+ {
+ as_bad_where (fragP->fr_file, fragP->fr_line,
+ _("invalid single nop size: %d "
+ "(expect within [0, %d])"),
+ limit, max_single_nop_size);
+ return;
+ }
+ }
+ else
+ fragP->fr_var = count;
+
+ if ((count / max_single_nop_size) > max_number_of_nops)
+ {
+ /* Generate jump over NOPs. */
+ offsetT disp = count - 2;
+ if (fits_in_imm7 (disp))
+ {
+ /* Use "jmp disp8" if possible. */
+ count = disp;
+ where[0] = jump_disp8[0];
+ where[1] = count;
+ where += 2;
}
else
{
- /* Maximum length of an instruction is 10 byte. If the
- padding is greater than 10 bytes and we don't use jump,
- we have to break it into smaller pieces. */
- int padding = count;
- while (padding > 10)
+ unsigned int size_of_jump;
+
+ if (flag_code == CODE_16BIT)
+ {
+ where[0] = jump16_disp32[0];
+ where[1] = jump16_disp32[1];
+ size_of_jump = 2;
+ }
+ else
{
- padding -= 10;
- memcpy (fragP->fr_literal + fragP->fr_fix + padding,
- patt [9], 10);
+ where[0] = jump32_disp32[0];
+ size_of_jump = 1;
+ }
+
+ count -= size_of_jump + 4;
+ if (!fits_in_imm31 (count))
+ {
+ as_bad_where (fragP->fr_file, fragP->fr_line,
+ _("jump over nop padding out of range"));
+ return;
}
- if (padding)
- memcpy (fragP->fr_literal + fragP->fr_fix,
- patt [padding - 1], padding);
+ md_number_to_chars (where + size_of_jump, count, 4);
+ where += size_of_jump + 4;
}
}
- fragP->fr_var = count;
+
+ /* Generate multiple NOPs. */
+ i386_output_nops (where, patt, count, limit);
}
static INLINE int
#define CPU_FLAGS_ARCH_MATCH 0x1
#define CPU_FLAGS_64BIT_MATCH 0x2
-#define CPU_FLAGS_AES_MATCH 0x4
-#define CPU_FLAGS_PCLMUL_MATCH 0x8
-#define CPU_FLAGS_AVX_MATCH 0x10
-#define CPU_FLAGS_32BIT_MATCH \
- (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_AES_MATCH \
- | CPU_FLAGS_PCLMUL_MATCH | CPU_FLAGS_AVX_MATCH)
#define CPU_FLAGS_PERFECT_MATCH \
- (CPU_FLAGS_32BIT_MATCH | CPU_FLAGS_64BIT_MATCH)
+ (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
/* Return CPU flags match bits. */
if (cpu_flags_all_zero (&x))
{
/* This instruction is available on all archs. */
- match |= CPU_FLAGS_32BIT_MATCH;
+ match |= CPU_FLAGS_ARCH_MATCH;
}
else
{
/* This instruction is available only on some archs. */
i386_cpu_flags cpu = cpu_arch_flags;
+ /* AVX512VL is no standalone feature - match it and then strip it. */
+ if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
+ return match;
+ x.bitfield.cpuavx512vl = 0;
+
cpu = cpu_flags_and (x, cpu);
if (!cpu_flags_all_zero (&cpu))
{
if (x.bitfield.cpuavx)
{
- /* We only need to check AES/PCLMUL/SSE2AVX with AVX. */
- if (cpu.bitfield.cpuavx)
- {
- /* Check SSE2AVX. */
- if (!t->opcode_modifier.sse2avx|| sse2avx)
- {
- match |= (CPU_FLAGS_ARCH_MATCH
- | CPU_FLAGS_AVX_MATCH);
- /* Check AES. */
- if (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
- match |= CPU_FLAGS_AES_MATCH;
- /* Check PCLMUL. */
- if (!x.bitfield.cpupclmul
- || cpu.bitfield.cpupclmul)
- match |= CPU_FLAGS_PCLMUL_MATCH;
- }
- }
- else
+ /* We need to check a few extra flags with AVX. */
+ if (cpu.bitfield.cpuavx
+ && (!t->opcode_modifier.sse2avx || sse2avx)
+ && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
+ && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
+ && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
match |= CPU_FLAGS_ARCH_MATCH;
}
- else if (x.bitfield.cpuavx512vl)
+ else if (x.bitfield.cpuavx512f)
{
- /* Match AVX512VL. */
- if (cpu.bitfield.cpuavx512vl)
- {
- /* Need another match. */
- cpu.bitfield.cpuavx512vl = 0;
- if (!cpu_flags_all_zero (&cpu))
- match |= CPU_FLAGS_32BIT_MATCH;
- else
- match |= CPU_FLAGS_ARCH_MATCH;
- }
- else
+ /* We need to check a few extra flags with AVX512F. */
+ if (cpu.bitfield.cpuavx512f
+ && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
+ && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
+ && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
match |= CPU_FLAGS_ARCH_MATCH;
}
else
- match |= CPU_FLAGS_32BIT_MATCH;
+ match |= CPU_FLAGS_ARCH_MATCH;
}
}
return match;
return x;
}
+static INLINE i386_operand_type
+operand_type_and_not (i386_operand_type x, i386_operand_type y)
+{
+ switch (ARRAY_SIZE (x.array))
+ {
+ case 3:
+ x.array [2] &= ~y.array [2];
+ /* Fall through. */
+ case 2:
+ x.array [1] &= ~y.array [1];
+ /* Fall through. */
+ case 1:
+ x.array [0] &= ~y.array [0];
+ break;
+ default:
+ abort ();
+ }
+ return x;
+}
+
static INLINE i386_operand_type
operand_type_or (i386_operand_type x, i386_operand_type y)
{
&& !t->operand_types[j].bitfield.fword)
/* For scalar opcode templates to allow register and memory
operands at the same time, some special casing is needed
- here. */
+ here. Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
+ down-conversion vpmov*. */
|| ((t->operand_types[j].bitfield.regsimd
&& !t->opcode_modifier.broadcast
- && (t->operand_types[j].bitfield.dword
+ && (t->operand_types[j].bitfield.byte
+ || t->operand_types[j].bitfield.word
+ || t->operand_types[j].bitfield.dword
|| t->operand_types[j].bitfield.qword))
? (i.types[j].bitfield.xmmword
|| i.types[j].bitfield.ymmword
if (match)
return match;
- else if (!t->opcode_modifier.d && !t->opcode_modifier.floatd)
+ else if (!t->opcode_modifier.d)
{
mismatch:
i.error = operand_size_mismatch;
cpu_arch_flags = flags;
cpu_arch_isa_flags = flags;
}
+ else
+ cpu_arch_isa_flags
+ = cpu_flags_or (cpu_arch_isa_flags,
+ cpu_arch[j].flags);
(void) restore_line_pointer (e);
demand_empty_rest_of_line ();
return;
}
}
+static INLINE bfd_boolean
+is_evex_encoding (const insn_template *t)
+{
+ return t->opcode_modifier.evex
+ || t->opcode_modifier.broadcast || t->opcode_modifier.masking
+ || t->opcode_modifier.staticrounding || t->opcode_modifier.sae;
+}
+
/* Build the EVEX prefix. */
static void
/* Encode the vector length. */
unsigned int vec_length;
+ if (!i.tm.opcode_modifier.evex
+ || i.tm.opcode_modifier.evex == EVEXDYN)
+ {
+ unsigned int op;
+
+ vec_length = 0;
+ for (op = 0; op < i.tm.operands; ++op)
+ if (i.tm.operand_types[op].bitfield.xmmword
+ + i.tm.operand_types[op].bitfield.ymmword
+ + i.tm.operand_types[op].bitfield.zmmword > 1)
+ {
+ if (i.types[op].bitfield.zmmword)
+ i.tm.opcode_modifier.evex = EVEX512;
+ else if (i.types[op].bitfield.ymmword)
+ i.tm.opcode_modifier.evex = EVEX256;
+ else if (i.types[op].bitfield.xmmword)
+ i.tm.opcode_modifier.evex = EVEX128;
+ else
+ continue;
+ break;
+ }
+ }
+
switch (i.tm.opcode_modifier.evex)
{
case EVEXLIG: /* LL' is ignored */
gas_assert (i.imm_operands <= 1
&& (i.operands <= 2
|| ((i.tm.opcode_modifier.vex
- || i.tm.opcode_modifier.evex)
+ || i.tm.opcode_modifier.vexopcode
+ || is_evex_encoding (&i.tm))
&& i.operands <= 4)));
exp = &im_expressions[i.imm_operands++];
}
}
+/* Try the shortest encoding by shortening operand size. */
+
+static void
+optimize_encoding (void)
+{
+ int j;
+
+ if (optimize_for_space
+ && i.reg_operands == 1
+ && i.imm_operands == 1
+ && !i.types[1].bitfield.byte
+ && i.op[0].imms->X_op == O_constant
+ && fits_in_imm7 (i.op[0].imms->X_add_number)
+ && ((i.tm.base_opcode == 0xa8
+ && i.tm.extension_opcode == None)
+ || (i.tm.base_opcode == 0xf6
+ && i.tm.extension_opcode == 0x0)))
+ {
+ /* Optimize: -Os:
+ test $imm7, %r64/%r32/%r16 -> test $imm7, %r8
+ */
+ unsigned int base_regnum = i.op[1].regs->reg_num;
+ if (flag_code == CODE_64BIT || base_regnum < 4)
+ {
+ i.types[1].bitfield.byte = 1;
+ /* Ignore the suffix. */
+ i.suffix = 0;
+ if (base_regnum >= 4
+ && !(i.op[1].regs->reg_flags & RegRex))
+ {
+ /* Handle SP, BP, SI and DI registers. */
+ if (i.types[1].bitfield.word)
+ j = 16;
+ else if (i.types[1].bitfield.dword)
+ j = 32;
+ else
+ j = 48;
+ i.op[1].regs -= j;
+ }
+ }
+ }
+ else if (flag_code == CODE_64BIT
+ && ((i.types[1].bitfield.qword
+ && i.reg_operands == 1
+ && i.imm_operands == 1
+ && i.op[0].imms->X_op == O_constant
+ && ((i.tm.base_opcode == 0xb0
+ && i.tm.extension_opcode == None
+ && fits_in_unsigned_long (i.op[0].imms->X_add_number))
+ || (fits_in_imm31 (i.op[0].imms->X_add_number)
+ && (((i.tm.base_opcode == 0x24
+ || i.tm.base_opcode == 0xa8)
+ && i.tm.extension_opcode == None)
+ || (i.tm.base_opcode == 0x80
+ && i.tm.extension_opcode == 0x4)
+ || ((i.tm.base_opcode == 0xf6
+ || i.tm.base_opcode == 0xc6)
+ && i.tm.extension_opcode == 0x0)))))
+ || (i.types[0].bitfield.qword
+ && ((i.reg_operands == 2
+ && i.op[0].regs == i.op[1].regs
+ && ((i.tm.base_opcode == 0x30
+ || i.tm.base_opcode == 0x28)
+ && i.tm.extension_opcode == None))
+ || (i.reg_operands == 1
+ && i.operands == 1
+ && i.tm.base_opcode == 0x30
+ && i.tm.extension_opcode == None)))))
+ {
+ /* Optimize: -O:
+ andq $imm31, %r64 -> andl $imm31, %r32
+ testq $imm31, %r64 -> testl $imm31, %r32
+ xorq %r64, %r64 -> xorl %r32, %r32
+ subq %r64, %r64 -> subl %r32, %r32
+ movq $imm31, %r64 -> movl $imm31, %r32
+ movq $imm32, %r64 -> movl $imm32, %r32
+ */
+ i.tm.opcode_modifier.norex64 = 1;
+ if (i.tm.base_opcode == 0xb0 || i.tm.base_opcode == 0xc6)
+ {
+ /* Handle
+ movq $imm31, %r64 -> movl $imm31, %r32
+ movq $imm32, %r64 -> movl $imm32, %r32
+ */
+ i.tm.operand_types[0].bitfield.imm32 = 1;
+ i.tm.operand_types[0].bitfield.imm32s = 0;
+ i.tm.operand_types[0].bitfield.imm64 = 0;
+ i.types[0].bitfield.imm32 = 1;
+ i.types[0].bitfield.imm32s = 0;
+ i.types[0].bitfield.imm64 = 0;
+ i.types[1].bitfield.dword = 1;
+ i.types[1].bitfield.qword = 0;
+ if (i.tm.base_opcode == 0xc6)
+ {
+ /* Handle
+ movq $imm31, %r64 -> movl $imm31, %r32
+ */
+ i.tm.base_opcode = 0xb0;
+ i.tm.extension_opcode = None;
+ i.tm.opcode_modifier.shortform = 1;
+ i.tm.opcode_modifier.modrm = 0;
+ }
+ }
+ }
+ else if (optimize > 1
+ && i.reg_operands == 3
+ && i.op[0].regs == i.op[1].regs
+ && !i.types[2].bitfield.xmmword
+ && (i.tm.opcode_modifier.vex
+ || (!i.mask
+ && !i.rounding
+ && is_evex_encoding (&i.tm)
+ && (i.vec_encoding != vex_encoding_evex
+ || i.tm.cpu_flags.bitfield.cpuavx512vl
+ || cpu_arch_isa_flags.bitfield.cpuavx512vl)))
+ && ((i.tm.base_opcode == 0x55
+ || i.tm.base_opcode == 0x6655
+ || i.tm.base_opcode == 0x66df
+ || i.tm.base_opcode == 0x57
+ || i.tm.base_opcode == 0x6657
+ || i.tm.base_opcode == 0x66ef
+ || i.tm.base_opcode == 0x66f8
+ || i.tm.base_opcode == 0x66f9
+ || i.tm.base_opcode == 0x66fa
+ || i.tm.base_opcode == 0x66fb)
+ && i.tm.extension_opcode == None))
+ {
+ /* Optimize: -O2:
+ VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
+ vpsubq and vpsubw:
+ EVEX VOP %zmmM, %zmmM, %zmmN
+ -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ EVEX VOP %ymmM, %ymmM, %ymmN
+ -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ VEX VOP %ymmM, %ymmM, %ymmN
+ -> VEX VOP %xmmM, %xmmM, %xmmN
+ VOP, one of vpandn and vpxor:
+ VEX VOP %ymmM, %ymmM, %ymmN
+ -> VEX VOP %xmmM, %xmmM, %xmmN
+ VOP, one of vpandnd and vpandnq:
+ EVEX VOP %zmmM, %zmmM, %zmmN
+ -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ EVEX VOP %ymmM, %ymmM, %ymmN
+ -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ VOP, one of vpxord and vpxorq:
+ EVEX VOP %zmmM, %zmmM, %zmmN
+ -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ EVEX VOP %ymmM, %ymmM, %ymmN
+ -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ */
+ if (is_evex_encoding (&i.tm))
+ {
+ if (i.vec_encoding == vex_encoding_evex)
+ i.tm.opcode_modifier.evex = EVEX128;
+ else
+ {
+ i.tm.opcode_modifier.vex = VEX128;
+ i.tm.opcode_modifier.vexw = VEXW0;
+ i.tm.opcode_modifier.evex = 0;
+ }
+ }
+ else
+ i.tm.opcode_modifier.vex = VEX128;
+
+ if (i.tm.opcode_modifier.vex)
+ for (j = 0; j < 3; j++)
+ {
+ i.types[j].bitfield.xmmword = 1;
+ i.types[j].bitfield.ymmword = 0;
+ }
+ }
+}
+
/* This is the guts of the machine-dependent assembler. LINE points to a
machine dependent instruction. This function is supposed to emit
the frags/bytes it assembles to. */
if (sse_check != check_none
&& !i.tm.opcode_modifier.noavx
+ && !i.tm.cpu_flags.bitfield.cpuavx
&& (i.tm.cpu_flags.bitfield.cpusse
|| i.tm.cpu_flags.bitfield.cpusse2
|| i.tm.cpu_flags.bitfield.cpusse3
|| i.tm.cpu_flags.bitfield.cpussse3
|| i.tm.cpu_flags.bitfield.cpusse4_1
- || i.tm.cpu_flags.bitfield.cpusse4_2))
+ || i.tm.cpu_flags.bitfield.cpusse4_2
+ || i.tm.cpu_flags.bitfield.cpupclmul
+ || i.tm.cpu_flags.bitfield.cpuaes
+ || i.tm.cpu_flags.bitfield.cpugfni))
{
(sse_check == check_warning
? as_warn
i.disp_operands = 0;
}
+ if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
+ optimize_encoding ();
+
if (!process_suffix ())
return;
as_warn (_("translating to `%sp'"), i.tm.name);
}
- if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
+ if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.vexopcode
+ || is_evex_encoding (&i.tm))
{
if (flag_code == CODE_16BIT)
{
}
}
+ if (i.rex == 0 && i.rex_encoding)
+ {
+ /* Check if we can add a REX_OPCODE byte. Look for 8 bit operand
+ that uses legacy register. If it is "hi" register, don't add
+ the REX_OPCODE byte. */
+ int x;
+ for (x = 0; x < 2; x++)
+ if (i.types[x].bitfield.reg
+ && i.types[x].bitfield.byte
+ && (i.op[x].regs->reg_flags & RegRex64) == 0
+ && i.op[x].regs->reg_num > 3)
+ {
+ i.rex_encoding = FALSE;
+ break;
+ }
+
+ if (i.rex_encoding)
+ i.rex = REX_OPCODE;
+ }
+
if (i.rex != 0)
add_prefix (REX_OPCODE | i.rex);
/* {evex} */
i.vec_encoding = vex_encoding_evex;
break;
+ case 0x7:
+ /* {rex} */
+ i.rex_encoding = TRUE;
+ break;
+ case 0x8:
+ /* {nooptimize} */
+ i.no_optimize = TRUE;
+ break;
default:
abort ();
}
{
supported |= cpu_flags_match (t);
if (supported == CPU_FLAGS_PERFECT_MATCH)
- goto skip;
- }
+ {
+ if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
+ as_warn (_("use .code16 to ensure correct addressing mode"));
- if (!(supported & CPU_FLAGS_64BIT_MATCH))
- {
- as_bad (flag_code == CODE_64BIT
- ? _("`%s' is not supported in 64-bit mode")
- : _("`%s' is only supported in 64-bit mode"),
- current_templates->start->name);
- return NULL;
- }
- if (supported != CPU_FLAGS_PERFECT_MATCH)
- {
- as_bad (_("`%s' is not supported on `%s%s'"),
- current_templates->start->name,
- cpu_arch_name ? cpu_arch_name : default_arch,
- cpu_sub_arch_name ? cpu_sub_arch_name : "");
- return NULL;
+ return l;
+ }
}
-skip:
- if (!cpu_arch_flags.bitfield.cpui386
- && (flag_code != CODE_16BIT))
- {
- as_warn (_("use .code16 to ensure correct addressing mode"));
- }
+ if (!(supported & CPU_FLAGS_64BIT_MATCH))
+ as_bad (flag_code == CODE_64BIT
+ ? _("`%s' is not supported in 64-bit mode")
+ : _("`%s' is only supported in 64-bit mode"),
+ current_templates->start->name);
+ else
+ as_bad (_("`%s' is not supported on `%s%s'"),
+ current_templates->start->name,
+ cpu_arch_name ? cpu_arch_name : default_arch,
+ cpu_sub_arch_name ? cpu_sub_arch_name : "");
- return l;
+ return NULL;
}
static char *
to the memory operand. */
if (i.broadcast)
{
- int broadcasted_opnd_size;
+ i386_operand_type type, overlap;
/* Check if specified broadcast is supported in this instruction,
- and it's applied to memory operand of DWORD or QWORD type,
- depending on VecESize. */
- if (i.broadcast->type != t->opcode_modifier.broadcast
- || !i.types[i.broadcast->operand].bitfield.mem
- || (t->opcode_modifier.vecesize == 0
- && !i.types[i.broadcast->operand].bitfield.dword
- && !i.types[i.broadcast->operand].bitfield.unspecified)
- || (t->opcode_modifier.vecesize == 1
- && !i.types[i.broadcast->operand].bitfield.qword
- && !i.types[i.broadcast->operand].bitfield.unspecified))
- goto bad_broadcast;
-
- broadcasted_opnd_size = t->opcode_modifier.vecesize ? 64 : 32;
- if (i.broadcast->type == BROADCAST_1TO16)
- broadcasted_opnd_size <<= 4; /* Broadcast 1to16. */
- else if (i.broadcast->type == BROADCAST_1TO8)
- broadcasted_opnd_size <<= 3; /* Broadcast 1to8. */
- else if (i.broadcast->type == BROADCAST_1TO4)
- broadcasted_opnd_size <<= 2; /* Broadcast 1to4. */
- else if (i.broadcast->type == BROADCAST_1TO2)
- broadcasted_opnd_size <<= 1; /* Broadcast 1to2. */
- else
- goto bad_broadcast;
-
- if ((broadcasted_opnd_size == 256
- && !t->operand_types[i.broadcast->operand].bitfield.ymmword)
- || (broadcasted_opnd_size == 512
- && !t->operand_types[i.broadcast->operand].bitfield.zmmword))
+ and it's applied to memory operand of DWORD or QWORD type. */
+ op = i.broadcast->operand;
+ if (!t->opcode_modifier.broadcast
+ || !i.types[op].bitfield.mem
+ || (!i.types[op].bitfield.unspecified
+ && (t->operand_types[op].bitfield.dword
+ ? !i.types[op].bitfield.dword
+ : !i.types[op].bitfield.qword)))
{
bad_broadcast:
i.error = unsupported_broadcast;
return 1;
}
+
+ operand_type_set (&type, 0);
+ switch ((t->operand_types[op].bitfield.dword ? 4 : 8) * i.broadcast->type)
+ {
+ case 8:
+ type.bitfield.qword = 1;
+ break;
+ case 16:
+ type.bitfield.xmmword = 1;
+ break;
+ case 32:
+ type.bitfield.ymmword = 1;
+ break;
+ case 64:
+ type.bitfield.zmmword = 1;
+ break;
+ default:
+ goto bad_broadcast;
+ }
+
+ overlap = operand_type_and (type, t->operand_types[op]);
+ if (operand_type_all_zero (&overlap))
+ goto bad_broadcast;
+
+ if (t->opcode_modifier.checkregsize)
+ {
+ unsigned int j;
+
+ for (j = 0; j < i.operands; ++j)
+ {
+ if (j != op
+ && !operand_type_register_match(i.types[j],
+ t->operand_types[j],
+ type,
+ t->operand_types[op]))
+ goto bad_broadcast;
+ }
+ }
}
/* If broadcast is supported in this instruction, we need to check if
operand of one-element size isn't specified without broadcast. */
break;
gas_assert (op < i.operands);
/* Check size of the memory operand. */
- if ((t->opcode_modifier.vecesize == 0
- && i.types[op].bitfield.dword)
- || (t->opcode_modifier.vecesize == 1
- && i.types[op].bitfield.qword))
+ if (t->operand_types[op].bitfield.dword
+ ? i.types[op].bitfield.dword
+ : i.types[op].bitfield.qword)
{
i.error = broadcast_needed;
return 1;
}
}
+ else
+ op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning. */
/* Check if requested masking is supported. */
if (i.mask
&& i.disp_encoding != disp_encoding_32bit)
{
if (i.broadcast)
- i.memshift = t->opcode_modifier.vecesize ? 3 : 2;
+ i.memshift = t->operand_types[op].bitfield.dword ? 2 : 3;
else
i.memshift = t->opcode_modifier.disp8memshift;
if (i.vec_encoding == vex_encoding_evex)
{
/* This instruction must be encoded with EVEX prefix. */
- if (!t->opcode_modifier.evex)
+ if (!is_evex_encoding (t))
{
i.error = unsupported;
return 1;
if (!found_cpu_match)
continue;
- /* Check old gcc support. */
- i.error = old_gcc_only;
- if (!old_gcc && t->opcode_modifier.oldgcc)
- continue;
-
/* Check AT&T mnemonic. */
i.error = unsupported_with_intel_mnemonic;
if (intel_mnemonic && t->opcode_modifier.attmnemonic)
&& operand_type_equal (&i.types [0], &acc32)
&& operand_type_equal (&i.types [1], &acc32))
continue;
+ /* xrelease mov %eax, <disp> is another special case. It must not
+ match the accumulator-only encoding of mov. */
+ if (flag_code != CODE_64BIT
+ && i.hle_prefix
+ && t->base_opcode == 0xa0
+ && i.types[0].bitfield.acc
+ && operand_type_check (i.types[1], anymem))
+ continue;
/* If we want store form, we reverse direction of operands. */
if (i.dir_encoding == dir_encoding_store
&& t->opcode_modifier.d)
operand_types[1])))
{
/* Check if other direction is valid ... */
- if (!t->opcode_modifier.d && !t->opcode_modifier.floatd)
+ if (!t->opcode_modifier.d)
continue;
check_reverse:
/* Does not match either direction. */
continue;
}
- /* found_reverse_match holds which of D or FloatDR
+ /* found_reverse_match holds which of D or FloatR
we've found. */
- if (t->opcode_modifier.d)
- found_reverse_match = Opcode_D;
- else if (t->opcode_modifier.floatd)
+ if (!t->opcode_modifier.d)
+ found_reverse_match = 0;
+ else if (operand_types[0].bitfield.tbyte)
found_reverse_match = Opcode_FloatD;
else
- found_reverse_match = 0;
+ found_reverse_match = Opcode_D;
if (t->opcode_modifier.floatr)
found_reverse_match |= Opcode_FloatR;
}
case 4:
if (!operand_type_match (overlap3, i.types[3])
|| (check_register
- && !operand_type_register_match (i.types[2],
- operand_types[2],
- i.types[3],
- operand_types[3])))
+ && (!operand_type_register_match (i.types[1],
+ operand_types[1],
+ i.types[3],
+ operand_types[3])
+ || !operand_type_register_match (i.types[2],
+ operand_types[2],
+ i.types[3],
+ operand_types[3]))))
continue;
/* Fall through. */
case 3:
/* Here we make use of the fact that there are no
- reverse match 3 operand instructions, and all 3
- operand instructions only need to be checked for
- register consistency between operands 2 and 3. */
+ reverse match 3 operand instructions. */
if (!operand_type_match (overlap2, i.types[2])
|| (check_register
- && !operand_type_register_match (i.types[1],
- operand_types[1],
- i.types[2],
- operand_types[2])))
+ && (!operand_type_register_match (i.types[0],
+ operand_types[0],
+ i.types[2],
+ operand_types[2])
+ || !operand_type_register_match (i.types[1],
+ operand_types[1],
+ i.types[2],
+ operand_types[2]))))
continue;
break;
}
case bad_imm4:
err_msg = _("constant doesn't fit in 4 bits");
break;
- case old_gcc_only:
- err_msg = _("only supported with old gcc");
- break;
case unsupported_with_intel_mnemonic:
err_msg = _("unsupported with Intel mnemonic");
break;
if (!i.tm.operand_types[op].bitfield.inoutportreg
&& !i.tm.operand_types[op].bitfield.shiftcount)
{
- if (i.types[op].bitfield.reg && i.types[op].bitfield.byte)
- {
- i.suffix = BYTE_MNEM_SUFFIX;
- break;
- }
- if (i.types[op].bitfield.reg && i.types[op].bitfield.word)
- {
- i.suffix = WORD_MNEM_SUFFIX;
- break;
- }
- if (i.types[op].bitfield.reg && i.types[op].bitfield.dword)
- {
- i.suffix = LONG_MNEM_SUFFIX;
- break;
- }
- if (i.types[op].bitfield.reg && i.types[op].bitfield.qword)
- {
- i.suffix = QWORD_MNEM_SUFFIX;
- break;
- }
+ if (!i.types[op].bitfield.reg)
+ continue;
+ if (i.types[op].bitfield.byte)
+ i.suffix = BYTE_MNEM_SUFFIX;
+ else if (i.types[op].bitfield.word)
+ i.suffix = WORD_MNEM_SUFFIX;
+ else if (i.types[op].bitfield.dword)
+ i.suffix = LONG_MNEM_SUFFIX;
+ else if (i.types[op].bitfield.qword)
+ i.suffix = QWORD_MNEM_SUFFIX;
+ else
+ continue;
+ break;
}
}
}
{
if (intel_syntax
&& i.tm.opcode_modifier.ignoresize
- && i.tm.opcode_modifier.no_lsuf)
+ && i.tm.opcode_modifier.no_lsuf
+ && !i.tm.opcode_modifier.todword
+ && !i.tm.opcode_modifier.toqword)
i.suffix = 0;
else if (!check_long_reg ())
return 0;
{
if (intel_syntax
&& i.tm.opcode_modifier.ignoresize
- && i.tm.opcode_modifier.no_qsuf)
+ && i.tm.opcode_modifier.no_qsuf
+ && !i.tm.opcode_modifier.todword
+ && !i.tm.opcode_modifier.toqword)
i.suffix = 0;
else if (!check_qword_reg ())
return 0;
else if (!check_word_reg ())
return 0;
}
- else if (i.suffix == XMMWORD_MNEM_SUFFIX
- || i.suffix == YMMWORD_MNEM_SUFFIX
- || i.suffix == ZMMWORD_MNEM_SUFFIX)
- {
- /* Skip if the instruction has x/y/z suffix. match_template
- should check if it is a valid suffix. */
- }
else if (intel_syntax && i.tm.opcode_modifier.ignoresize)
/* Do nothing if the instruction is going to ignore the prefix. */
;
}
}
- /* Change the opcode based on the operand size given by i.suffix;
- We don't need to change things for byte insns. */
-
- if (i.suffix
- && i.suffix != BYTE_MNEM_SUFFIX
- && i.suffix != XMMWORD_MNEM_SUFFIX
- && i.suffix != YMMWORD_MNEM_SUFFIX
- && i.suffix != ZMMWORD_MNEM_SUFFIX)
+ /* Change the opcode based on the operand size given by i.suffix. */
+ switch (i.suffix)
{
+ /* Size floating point instruction. */
+ case LONG_MNEM_SUFFIX:
+ if (i.tm.opcode_modifier.floatmf)
+ {
+ i.tm.base_opcode ^= 4;
+ break;
+ }
+ /* fall through */
+ case WORD_MNEM_SUFFIX:
+ case QWORD_MNEM_SUFFIX:
/* It's not a byte, select word/dword operation. */
if (i.tm.opcode_modifier.w)
{
else
i.tm.base_opcode |= 1;
}
-
+ /* fall through */
+ case SHORT_MNEM_SUFFIX:
/* Now select between word & dword operations via the operand
size prefix, except for instructions that will ignore this
prefix anyway. */
return 0;
}
else if (i.suffix != QWORD_MNEM_SUFFIX
- && i.suffix != LONG_DOUBLE_MNEM_SUFFIX
&& !i.tm.opcode_modifier.ignoresize
&& !i.tm.opcode_modifier.floatmf
&& ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
/* Set mode64 for an operand. */
if (i.suffix == QWORD_MNEM_SUFFIX
&& flag_code == CODE_64BIT
- && !i.tm.opcode_modifier.norex64)
- {
+ && !i.tm.opcode_modifier.norex64
/* Special case for xchg %rax,%rax. It is NOP and doesn't
- need rex64. cmpxchg8b is also a special case. */
- if (! (i.operands == 2
- && i.tm.base_opcode == 0x90
- && i.tm.extension_opcode == None
- && operand_type_equal (&i.types [0], &acc64)
- && operand_type_equal (&i.types [1], &acc64))
- && ! (i.operands == 1
- && i.tm.base_opcode == 0xfc7
- && i.tm.extension_opcode == 1
- && !operand_type_check (i.types [0], reg)
- && operand_type_check (i.types [0], anymem)))
- i.rex |= REX_W;
- }
-
- /* Size floating point instruction. */
- if (i.suffix == LONG_MNEM_SUFFIX)
- if (i.tm.opcode_modifier.floatmf)
- i.tm.base_opcode ^= 4;
+ need rex64. */
+ && ! (i.operands == 2
+ && i.tm.base_opcode == 0x90
+ && i.tm.extension_opcode == None
+ && operand_type_equal (&i.types [0], &acc64)
+ && operand_type_equal (&i.types [1], &acc64)))
+ i.rex |= REX_W;
+
+ break;
}
return 1;
}
else if (i.tm.opcode_modifier.implicitquadgroup)
{
+ unsigned int regnum, first_reg_in_group, last_reg_in_group;
+
/* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
gas_assert (i.operands >= 2 && i.types[1].bitfield.regsimd);
- unsigned int regnum = register_number (i.op[1].regs);
- unsigned int first_reg_in_group = regnum & ~3;
- unsigned int last_reg_in_group = first_reg_in_group + 3;
- if (regnum != first_reg_in_group) {
- as_warn (_("the second source register `%s%s' implicitly denotes"
- " `%s%.3s%d' to `%s%.3s%d' source group in `%s'"),
- register_prefix, i.op[1].regs->reg_name,
- register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
- register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
- i.tm.name);
- }
- }
+ regnum = register_number (i.op[1].regs);
+ first_reg_in_group = regnum & ~3;
+ last_reg_in_group = first_reg_in_group + 3;
+ if (regnum != first_reg_in_group)
+ as_warn (_("source register `%s%s' implicitly denotes"
+ " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
+ register_prefix, i.op[1].regs->reg_name,
+ register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
+ register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
+ i.tm.name);
+ }
else if (i.tm.opcode_modifier.regkludge)
{
/* The imul $imm, %reg instruction is converted into
unsigned int source, dest;
int vex_3_sources;
- /* The first operand of instructions with VEX prefix and 3 sources
- must be VEX_Imm4. */
vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
if (vex_3_sources)
{
unsigned int nds, reg_slot;
expressionS *exp;
- if (i.tm.opcode_modifier.veximmext
- && i.tm.opcode_modifier.immext)
- {
- dest = i.operands - 2;
- gas_assert (dest == 3);
- }
- else
- dest = i.operands - 1;
+ dest = i.operands - 1;
nds = dest - 1;
/* There are 2 kinds of instructions:
- 1. 5 operands: 4 register operands or 3 register operands
- plus 1 memory operand plus one Vec_Imm4 operand, VexXDS, and
- VexW0 or VexW1. The destination must be either XMM, YMM or
+ 1. 5 operands: 4 register operands or 3 register operands
+ plus 1 memory operand plus one Vec_Imm4 operand, VexXDS, and
+ VexW0 or VexW1. The destination must be either XMM, YMM or
ZMM register.
- 2. 4 operands: 4 register operands or 3 register operands
- plus 1 memory operand, VexXDS, and VexImmExt */
+ 2. 4 operands: 4 register operands or 3 register operands
+ plus 1 memory operand, with VexXDS. */
gas_assert ((i.reg_operands == 4
- || (i.reg_operands == 3 && i.mem_operands == 1))
- && i.tm.opcode_modifier.vexvvvv == VEXXDS
- && (i.tm.opcode_modifier.veximmext
- || (i.imm_operands == 1
- && i.types[0].bitfield.vec_imm4
- && (i.tm.opcode_modifier.vexw == VEXW0
- || i.tm.opcode_modifier.vexw == VEXW1)
- && i.tm.operand_types[dest].bitfield.regsimd)));
+ || (i.reg_operands == 3 && i.mem_operands == 1))
+ && i.tm.opcode_modifier.vexvvvv == VEXXDS
+ && i.tm.opcode_modifier.vexw
+ && i.tm.operand_types[dest].bitfield.regsimd);
+
+ /* If VexW1 is set, the first non-immediate operand is the source and
+ the second non-immediate one is encoded in the immediate operand. */
+ if (i.tm.opcode_modifier.vexw == VEXW1)
+ {
+ source = i.imm_operands;
+ reg_slot = i.imm_operands + 1;
+ }
+ else
+ {
+ source = i.imm_operands + 1;
+ reg_slot = i.imm_operands;
+ }
if (i.imm_operands == 0)
- {
- /* When there is no immediate operand, generate an 8bit
- immediate operand to encode the first operand. */
- exp = &im_expressions[i.imm_operands++];
- i.op[i.operands].imms = exp;
- i.types[i.operands] = imm8;
- i.operands++;
- /* If VexW1 is set, the first operand is the source and
- the second operand is encoded in the immediate operand. */
- if (i.tm.opcode_modifier.vexw == VEXW1)
- {
- source = 0;
- reg_slot = 1;
- }
- else
- {
- source = 1;
- reg_slot = 0;
- }
-
- /* FMA swaps REG and NDS. */
- if (i.tm.cpu_flags.bitfield.cpufma)
- {
- unsigned int tmp;
- tmp = reg_slot;
- reg_slot = nds;
- nds = tmp;
- }
-
- gas_assert (i.tm.operand_types[reg_slot].bitfield.regsimd);
- exp->X_op = O_constant;
- exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
+ {
+ /* When there is no immediate operand, generate an 8bit
+ immediate operand to encode the first operand. */
+ exp = &im_expressions[i.imm_operands++];
+ i.op[i.operands].imms = exp;
+ i.types[i.operands] = imm8;
+ i.operands++;
+
+ gas_assert (i.tm.operand_types[reg_slot].bitfield.regsimd);
+ exp->X_op = O_constant;
+ exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
}
else
- {
- unsigned int imm_slot;
-
- if (i.tm.opcode_modifier.vexw == VEXW0)
- {
- /* If VexW0 is set, the third operand is the source and
- the second operand is encoded in the immediate
- operand. */
- source = 2;
- reg_slot = 1;
- }
- else
- {
- /* VexW1 is set, the second operand is the source and
- the third operand is encoded in the immediate
- operand. */
- source = 1;
- reg_slot = 2;
- }
-
- if (i.tm.opcode_modifier.immext)
- {
- /* When ImmExt is set, the immediate byte is the last
- operand. */
- imm_slot = i.operands - 1;
- source--;
- reg_slot--;
- }
- else
- {
- imm_slot = 0;
-
- /* Turn on Imm8 so that output_imm will generate it. */
- i.types[imm_slot].bitfield.imm8 = 1;
- }
-
- gas_assert (i.tm.operand_types[reg_slot].bitfield.regsimd);
- i.op[imm_slot].imms->X_add_number
- |= register_number (i.op[reg_slot].regs) << 4;
+ {
+ unsigned int imm_slot;
+
+ gas_assert (i.imm_operands == 1 && i.types[0].bitfield.vec_imm4);
+
+ if (i.tm.opcode_modifier.immext)
+ {
+ /* When ImmExt is set, the immediate byte is the last
+ operand. */
+ imm_slot = i.operands - 1;
+ source--;
+ reg_slot--;
+ }
+ else
+ {
+ imm_slot = 0;
+
+ /* Turn on Imm8 so that output_imm will generate it. */
+ i.types[imm_slot].bitfield.imm8 = 1;
+ }
+
+ gas_assert (i.tm.operand_types[reg_slot].bitfield.regsimd);
+ i.op[imm_slot].imms->X_add_number
+ |= register_number (i.op[reg_slot].regs) << 4;
gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
- }
+ }
gas_assert (i.tm.operand_types[nds].bitfield.regsimd);
i.vex.register_specifier = i.op[nds].regs;
}
break;
case 5:
- if (i.tm.opcode_modifier.evex)
+ if (is_evex_encoding (&i.tm))
{
/* For EVEX instructions, when there are 5 operands, the
first one must be immediate operand. If the second one
fake_zero_displacement = 1;
if (i.index_reg == 0)
{
+ i386_operand_type newdisp;
+
gas_assert (!i.tm.opcode_modifier.vecsib);
/* Operand is just <disp> */
if (flag_code == CODE_64BIT)
i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
i.sib.base = NO_BASE_REGISTER;
i.sib.index = NO_INDEX_REGISTER;
- i.types[op] = ((i.prefix[ADDR_PREFIX] == 0)
- ? disp32s : disp32);
+ newdisp = (!i.prefix[ADDR_PREFIX] ? disp32s : disp32);
}
else if ((flag_code == CODE_16BIT)
^ (i.prefix[ADDR_PREFIX] != 0))
{
i.rm.regmem = NO_BASE_REGISTER_16;
- i.types[op] = disp16;
+ newdisp = disp16;
}
else
{
i.rm.regmem = NO_BASE_REGISTER;
- i.types[op] = disp32;
+ newdisp = disp32;
}
+ i.types[op] = operand_type_and_not (i.types[op], anydisp);
+ i.types[op] = operand_type_or (i.types[op], newdisp);
}
else if (!i.tm.opcode_modifier.vecsib)
{
if (flag_code == CODE_64BIT
&& operand_type_check (i.types[op], disp))
{
- i386_operand_type temp;
- operand_type_set (&temp, 0);
- temp.bitfield.disp8 = i.types[op].bitfield.disp8;
- i.types[op] = temp;
+ i.types[op].bitfield.disp16 = 0;
+ i.types[op].bitfield.disp64 = 0;
if (i.prefix[ADDR_PREFIX] == 0)
- i.types[op].bitfield.disp32s = 1;
+ {
+ i.types[op].bitfield.disp32 = 0;
+ i.types[op].bitfield.disp32s = 1;
+ }
else
- i.types[op].bitfield.disp32 = 1;
+ {
+ i.types[op].bitfield.disp32 = 1;
+ i.types[op].bitfield.disp32s = 0;
+ }
}
if (!i.tm.opcode_modifier.vecsib)
}
else
{
- /* There are only 2 operands. */
- gas_assert (op < 2 && i.operands == 2);
- vex_reg = 1;
+ /* There are only 2 non-immediate operands. */
+ gas_assert (op < i.imm_operands + 2
+ && i.operands == i.imm_operands + 2);
+ vex_reg = i.imm_operands + 1;
}
}
else
op_string += 3;
if (*op_string == '8')
- bcst_type = BROADCAST_1TO8;
+ bcst_type = 8;
else if (*op_string == '4')
- bcst_type = BROADCAST_1TO4;
+ bcst_type = 4;
else if (*op_string == '2')
- bcst_type = BROADCAST_1TO2;
+ bcst_type = 2;
else if (*op_string == '1'
&& *(op_string+1) == '6')
{
- bcst_type = BROADCAST_1TO16;
+ bcst_type = 16;
op_string++;
}
else
return NULL;
}
op_string++;
+
+ /* Strip whitespace since the addition of pseudo prefixes
+ changed how the scrubber treats '{'. */
+ if (is_space_char (*op_string))
+ ++op_string;
+
continue;
}
unknown_vec_op:
\f
#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
-const char *md_shortopts = "kVQ:sqn";
+const char *md_shortopts = "kVQ:sqnO::";
#else
-const char *md_shortopts = "qn";
+const char *md_shortopts = "qnO::";
#endif
#define OPTION_32 (OPTION_MD_BASE + 0)
#define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
#define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
#define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
-#define OPTION_MOLD_GCC (OPTION_MD_BASE + 9)
+#define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
#define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
#define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
#define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
#define OPTION_MAMD64 (OPTION_MD_BASE + 22)
#define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
#define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
-#define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 25)
struct option md_longopts[] =
{
{"msyntax", required_argument, NULL, OPTION_MSYNTAX},
{"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
{"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
- {"mold-gcc", no_argument, NULL, OPTION_MOLD_GCC},
{"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
{"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
{"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
cpu_arch_flags = flags;
cpu_arch_isa_flags = flags;
}
+ else
+ cpu_arch_isa_flags
+ = cpu_flags_or (cpu_arch_isa_flags,
+ cpu_arch[j].flags);
break;
}
}
allow_naked_reg = 1;
break;
- case OPTION_MOLD_GCC:
- old_gcc = 1;
- break;
-
case OPTION_MSSE2AVX:
sse2avx = 1;
break;
intel64 = 1;
break;
+ case 'O':
+ if (arg == NULL)
+ {
+ optimize = 1;
+ /* Turn off -Os. */
+ optimize_for_space = 0;
+ }
+ else if (*arg == 's')
+ {
+ optimize_for_space = 1;
+ /* Turn on all encoding optimizations. */
+ optimize = -1;
+ }
+ else
+ {
+ optimize = atoi (arg);
+ /* Turn off -Os. */
+ optimize_for_space = 0;
+ }
+ break;
+
default:
return 0;
}
fprintf (stream, _("\
-mnaked-reg don't require `%%' prefix for registers\n"));
fprintf (stream, _("\
- -mold-gcc support old (<= 2.8.1) versions of gcc\n"));
- fprintf (stream, _("\
-madd-bnd-prefix add BND prefix for all valid branches\n"));
fprintf (stream, _("\
-mshared disable branch optimization for shared code\n"));