X-Git-Url: http://git.efficios.com/?a=blobdiff_plain;f=gas%2Fconfig%2Ftc-i386.c;h=2c087e42e0de4486ce1b89d6c2d1b1d36ded1a9e;hb=72aea32839b5f4a2c8fa67e60451169b841941c0;hp=41f701c3d88dd05c9f5e28d2ed455af50d38c488;hpb=625cbd7ac121a6df6d237c8368262f6634fb61ab;p=deliverable%2Fbinutils-gdb.git diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 41f701c3d8..2c087e42e0 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -1,5 +1,5 @@ /* tc-i386.c -- Assemble code for the Intel 80386 - Copyright (C) 1989-2018 Free Software Foundation, Inc. + Copyright (C) 1989-2020 Free Software Foundation, Inc. This file is part of GAS, the GNU Assembler. @@ -33,6 +33,17 @@ #include "elf/x86-64.h" #include "opcodes/i386-init.h" +#ifdef HAVE_LIMITS_H +#include +#else +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#ifndef INT_MAX +#define INT_MAX (int) (((unsigned) (-1)) >> 1) +#endif +#endif + #ifndef REGISTER_WARNINGS #define REGISTER_WARNINGS 1 #endif @@ -87,6 +98,9 @@ #define END_OF_INSN '\0' +/* This matches the C -> StaticRounding alias in the opcode table. */ +#define commutative staticrounding + /* 'templates' is for grouping together 'template' structures for opcodes of the same name. This is only used for storing the insns in the grand @@ -168,6 +182,7 @@ static char *parse_insn (char *, char *); static char *parse_operands (char *, const char *); static void swap_operands (void); static void swap_2_operands (int, int); +static enum flag_code i386_addressing_mode (void); static void optimize_imm (void); static void optimize_disp (void); static const insn_template *match_template (char); @@ -188,6 +203,13 @@ static void s_bss (int); #endif #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) static void handle_large_common (int small ATTRIBUTE_UNUSED); + +/* GNU_PROPERTY_X86_ISA_1_USED. */ +static unsigned int x86_isa_1_used; +/* GNU_PROPERTY_X86_FEATURE_2_USED. */ +static unsigned int x86_feature_2_used; +/* Generate x86 used ISA and feature properties. */ +static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE; #endif static const char *default_arch = DEFAULT_ARCH; @@ -230,6 +252,9 @@ struct Broadcast_Operation /* Index of broadcasted operand. */ int operand; + + /* Number of bytes to broadcast. */ + int bytes; }; static struct Broadcast_Operation broadcast_op; @@ -269,7 +294,6 @@ enum i386_error invalid_vector_register_set, unsupported_vector_index_register, unsupported_broadcast, - broadcast_not_on_src_operand, broadcast_needed, unsupported_masking, mask_not_on_destination, @@ -307,6 +331,7 @@ struct _i386_insn /* Flags for operands. */ unsigned int flags[MAX_OPERANDS]; #define Operand_PCrel 1 +#define Operand_Mem 2 /* Relocation type for operand */ enum bfd_reloc_code_real reloc[MAX_OPERANDS]; @@ -329,6 +354,24 @@ struct _i386_insn unsigned int prefixes; unsigned char prefix[MAX_PREFIXES]; + /* The operand to a branch insn indicates an absolute branch. */ + bfd_boolean jumpabsolute; + + /* Has MMX register operands. */ + bfd_boolean has_regmmx; + + /* Has XMM register operands. */ + bfd_boolean has_regxmm; + + /* Has YMM register operands. */ + bfd_boolean has_regymm; + + /* Has ZMM register operands. */ + bfd_boolean has_regzmm; + + /* Has GOTPC or TLS relocation. */ + bfd_boolean has_gotpc_tls_reloc; + /* RM and SIB are the modrm byte and the sib byte where the addressing modes of this insn are encoded. */ modrm_byte rm; @@ -354,7 +397,8 @@ struct _i386_insn { dir_encoding_default = 0, dir_encoding_load, - dir_encoding_store + dir_encoding_store, + dir_encoding_swap } dir_encoding; /* Prefer 8bit or 32bit displacement in encoding. */ @@ -522,6 +566,8 @@ static enum flag_code flag_code; static unsigned int object_64bit; static unsigned int disallow_64bit_reloc; static int use_rela_relocations = 0; +/* __tls_get_addr/___tls_get_addr symbol for TLS. */ +static const char *tls_get_addr; #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \ || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \ @@ -582,6 +628,21 @@ static int omit_lock_prefix = 0; "lock addl $0, (%{re}sp)". */ static int avoid_fence = 0; +/* Type of the previous instruction. */ +static struct + { + segT seg; + const char *file; + const char *name; + unsigned int line; + enum last_insn_kind + { + last_insn_other = 0, + last_insn_directive, + last_insn_prefix + } kind; + } last_insn; + /* 1 if the assembler should generate relax relocations. */ static int generate_relax_relocations @@ -595,6 +656,44 @@ static enum check_kind } sse_check, operand_check = check_warning; +/* Non-zero if branches should be aligned within power of 2 boundary. */ +static int align_branch_power = 0; + +/* Types of branches to align. */ +enum align_branch_kind + { + align_branch_none = 0, + align_branch_jcc = 1, + align_branch_fused = 2, + align_branch_jmp = 3, + align_branch_call = 4, + align_branch_indirect = 5, + align_branch_ret = 6 + }; + +/* Type bits of branches to align. */ +enum align_branch_bit + { + align_branch_jcc_bit = 1 << align_branch_jcc, + align_branch_fused_bit = 1 << align_branch_fused, + align_branch_jmp_bit = 1 << align_branch_jmp, + align_branch_call_bit = 1 << align_branch_call, + align_branch_indirect_bit = 1 << align_branch_indirect, + align_branch_ret_bit = 1 << align_branch_ret + }; + +static unsigned int align_branch = (align_branch_jcc_bit + | align_branch_fused_bit + | align_branch_jmp_bit); + +/* The maximum padding size for fused jcc. CMP like instruction can + be 9 bytes and jcc can be 6 bytes. Leave room just in case for + prefixes. */ +#define MAX_FUSED_JCC_PADDING_SIZE 20 + +/* The maximum number of prefixes added for an instruction. */ +static unsigned int align_branch_prefix_size = 5; + /* Optimization: 1. Clear the REX_W bit with register operand if possible. 2. Above plus use 128bit vector instruction to clear the full vector @@ -661,6 +760,13 @@ static enum vex256 } avxscalar; +/* Encode VEX WIG instructions with specific vex.w. */ +static enum + { + vexw0 = 0, + vexw1 + } vexwig; + /* Encode scalar EVEX LIG instructions with specific vector length. */ static enum { @@ -691,12 +797,19 @@ int x86_cie_data_alignment; /* Interface to relax_segment. There are 3 major relax states for 386 jump insns because the different types of jumps add different sizes to frags when we're - figuring out what sort of jump to choose to reach a given label. */ + figuring out what sort of jump to choose to reach a given label. + + BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align + branches which are handled by md_estimate_size_before_relax() and + i386_generic_table_relax_frag(). */ /* Types. */ #define UNCOND_JUMP 0 #define COND_JUMP 1 #define COND_JUMP86 2 +#define BRANCH_PADDING 3 +#define BRANCH_PREFIX 4 +#define FUSED_JCC_PADDING 5 /* Sizes. */ #define CODE16 1 @@ -857,6 +970,10 @@ static const arch_entry cpu_arch[] = CPU_387_FLAGS, 0 }, { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN, CPU_687_FLAGS, 0 }, + { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN, + CPU_CMOV_FLAGS, 0 }, + { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN, + CPU_FXSR_FLAGS, 0 }, { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN, CPU_MMX_FLAGS, 0 }, { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN, @@ -1035,6 +1152,16 @@ static const arch_entry cpu_arch[] = CPU_MOVDIRI_FLAGS, 0 }, { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN, CPU_MOVDIR64B_FLAGS, 0 }, + { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN, + CPU_AVX512_BF16_FLAGS, 0 }, + { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN, + CPU_AVX512_VP2INTERSECT_FLAGS, 0 }, + { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN, + CPU_ENQCMD_FLAGS, 0 }, + { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN, + CPU_RDPRU_FLAGS, 0 }, + { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN, + CPU_MCOMMIT_FLAGS, 0 }, }; static const noarch_entry cpu_noarch[] = @@ -1043,6 +1170,8 @@ static const noarch_entry cpu_noarch[] = { STRING_COMMA_LEN ("no287"), CPU_ANY_287_FLAGS }, { STRING_COMMA_LEN ("no387"), CPU_ANY_387_FLAGS }, { STRING_COMMA_LEN ("no687"), CPU_ANY_687_FLAGS }, + { STRING_COMMA_LEN ("nocmov"), CPU_ANY_CMOV_FLAGS }, + { STRING_COMMA_LEN ("nofxsr"), CPU_ANY_FXSR_FLAGS }, { STRING_COMMA_LEN ("nommx"), CPU_ANY_MMX_FLAGS }, { STRING_COMMA_LEN ("nosse"), CPU_ANY_SSE_FLAGS }, { STRING_COMMA_LEN ("nosse2"), CPU_ANY_SSE2_FLAGS }, @@ -1072,6 +1201,9 @@ static const noarch_entry cpu_noarch[] = { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS }, { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS }, { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS }, + { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS }, + { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS }, + { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS }, }; #ifdef I386COFF @@ -1249,7 +1381,16 @@ i386_output_nops (char *where, const unsigned char *const *patt, /* Place the longer NOP first. */ int last; int offset; - const unsigned char *nops = patt[max_single_nop_size - 1]; + const unsigned char *nops; + + if (max_single_nop_size < 1) + { + as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"), + max_single_nop_size); + return; + } + + nops = patt[max_single_nop_size - 1]; /* Use the smaller one if the requsted one isn't available. */ if (nops == NULL) @@ -1309,6 +1450,12 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) case rs_fill_nop: case rs_align_code: break; + case rs_machine_dependent: + /* Allow NOP padding for jumps and calls. */ + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING) + break; + /* Fall through. */ default: return; } @@ -1453,7 +1600,7 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) return; } } - else + else if (fragP->fr_type != rs_machine_dependent) fragP->fr_var = count; if ((count / max_single_nop_size) > max_number_of_nops) @@ -1539,6 +1686,9 @@ operand_type_set (union i386_operand_type *x, unsigned int v) default: abort (); } + + x->bitfield.class = ClassNone; + x->bitfield.instance = InstanceNone; } static INLINE int @@ -1753,6 +1903,11 @@ cpu_flags_match (const insn_template *t) static INLINE i386_operand_type operand_type_and (i386_operand_type x, i386_operand_type y) { + if (x.bitfield.class != y.bitfield.class) + x.bitfield.class = ClassNone; + if (x.bitfield.instance != y.bitfield.instance) + x.bitfield.instance = InstanceNone; + switch (ARRAY_SIZE (x.array)) { case 3: @@ -1773,6 +1928,9 @@ operand_type_and (i386_operand_type x, i386_operand_type y) static INLINE i386_operand_type operand_type_and_not (i386_operand_type x, i386_operand_type y) { + gas_assert (y.bitfield.class == ClassNone); + gas_assert (y.bitfield.instance == InstanceNone); + switch (ARRAY_SIZE (x.array)) { case 3: @@ -1793,6 +1951,13 @@ operand_type_and_not (i386_operand_type x, i386_operand_type y) static INLINE i386_operand_type operand_type_or (i386_operand_type x, i386_operand_type y) { + gas_assert (x.bitfield.class == ClassNone || + y.bitfield.class == ClassNone || + x.bitfield.class == y.bitfield.class); + gas_assert (x.bitfield.instance == InstanceNone || + y.bitfield.instance == InstanceNone || + x.bitfield.instance == y.bitfield.instance); + switch (ARRAY_SIZE (x.array)) { case 3: @@ -1813,6 +1978,9 @@ operand_type_or (i386_operand_type x, i386_operand_type y) static INLINE i386_operand_type operand_type_xor (i386_operand_type x, i386_operand_type y) { + gas_assert (y.bitfield.class == ClassNone); + gas_assert (y.bitfield.instance == InstanceNone); + switch (ARRAY_SIZE (x.array)) { case 3: @@ -1830,14 +1998,12 @@ operand_type_xor (i386_operand_type x, i386_operand_type y) return x; } -static const i386_operand_type acc32 = OPERAND_TYPE_ACC32; -static const i386_operand_type acc64 = OPERAND_TYPE_ACC64; static const i386_operand_type disp16 = OPERAND_TYPE_DISP16; static const i386_operand_type disp32 = OPERAND_TYPE_DISP32; static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S; static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32; -static const i386_operand_type anydisp - = OPERAND_TYPE_ANYDISP; +static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP; +static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM; static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM; static const i386_operand_type regmask = OPERAND_TYPE_REGMASK; static const i386_operand_type imm8 = OPERAND_TYPE_IMM8; @@ -1849,7 +2015,6 @@ static const i386_operand_type imm64 = OPERAND_TYPE_IMM64; static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32; static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S; static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S; -static const i386_operand_type vec_imm4 = OPERAND_TYPE_VEC_IMM4; enum operand_type { @@ -1865,7 +2030,7 @@ operand_type_check (i386_operand_type t, enum operand_type c) switch (c) { case reg: - return t.bitfield.reg; + return t.bitfield.class == Reg; case imm: return (t.bitfield.imm8 @@ -1897,11 +2062,12 @@ operand_type_check (i386_operand_type t, enum operand_type c) return 0; } -/* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit on - operand J for instruction template T. */ +/* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size + between operand GIVEN and opeand WANTED for instruction template T. */ static INLINE int -match_reg_size (const insn_template *t, unsigned int wanted, unsigned int given) +match_operand_size (const insn_template *t, unsigned int wanted, + unsigned int given) { return !((i.types[given].bitfield.byte && !t->operand_types[wanted].bitfield.byte) @@ -1915,11 +2081,12 @@ match_reg_size (const insn_template *t, unsigned int wanted, unsigned int given) && !t->operand_types[wanted].bitfield.tbyte)); } -/* Return 1 if there is no conflict in SIMD register on - operand J for instruction template T. */ +/* Return 1 if there is no conflict in SIMD register between operand + GIVEN and opeand WANTED for instruction template T. */ static INLINE int -match_simd_size (const insn_template *t, unsigned int wanted, unsigned int given) +match_simd_size (const insn_template *t, unsigned int wanted, + unsigned int given) { return !((i.types[given].bitfield.xmmword && !t->operand_types[wanted].bitfield.xmmword) @@ -1929,13 +2096,14 @@ match_simd_size (const insn_template *t, unsigned int wanted, unsigned int given && !t->operand_types[wanted].bitfield.zmmword)); } -/* Return 1 if there is no conflict in any size on operand J for - instruction template T. */ +/* Return 1 if there is no conflict in any size between operand GIVEN + and opeand WANTED for instruction template T. */ static INLINE int -match_mem_size (const insn_template *t, unsigned int wanted, unsigned int given) +match_mem_size (const insn_template *t, unsigned int wanted, + unsigned int given) { - return (match_reg_size (t, wanted, given) + return (match_operand_size (t, wanted, given) && !((i.types[given].bitfield.unspecified && !i.broadcast && !t->operand_types[wanted].bitfield.unspecified) @@ -1945,7 +2113,7 @@ match_mem_size (const insn_template *t, unsigned int wanted, unsigned int given) operands at the same time, some special casing is needed here. Also for v{,p}broadcast*, {,v}pmov{s,z}*, and down-conversion vpmov*. */ - || ((t->operand_types[wanted].bitfield.regsimd + || ((t->operand_types[wanted].bitfield.class == RegSIMD && !t->opcode_modifier.broadcast && (t->operand_types[wanted].bitfield.byte || t->operand_types[wanted].bitfield.word @@ -1970,42 +2138,41 @@ operand_size_match (const insn_template *t) { unsigned int j, match = MATCH_STRAIGHT; - /* Don't check jump instructions. */ + /* Don't check non-absolute jump instructions. */ if (t->opcode_modifier.jump - || t->opcode_modifier.jumpbyte - || t->opcode_modifier.jumpdword - || t->opcode_modifier.jumpintersegment) + && t->opcode_modifier.jump != JUMP_ABSOLUTE) return match; /* Check memory and accumulator operand size. */ for (j = 0; j < i.operands; j++) { - if (!i.types[j].bitfield.reg && !i.types[j].bitfield.regsimd - && t->operand_types[j].bitfield.anysize) + if (i.types[j].bitfield.class != Reg + && i.types[j].bitfield.class != RegSIMD + && t->opcode_modifier.anysize) continue; - if (t->operand_types[j].bitfield.reg - && !match_reg_size (t, j, j)) + if (t->operand_types[j].bitfield.class == Reg + && !match_operand_size (t, j, j)) { match = 0; break; } - if (t->operand_types[j].bitfield.regsimd + if (t->operand_types[j].bitfield.class == RegSIMD && !match_simd_size (t, j, j)) { match = 0; break; } - if (t->operand_types[j].bitfield.acc - && (!match_reg_size (t, j, j) || !match_simd_size (t, j, j))) + if (t->operand_types[j].bitfield.instance == Accum + && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j))) { match = 0; break; } - if (i.types[j].bitfield.mem && !match_mem_size (t, j, j)) + if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j)) { match = 0; break; @@ -2021,17 +2188,26 @@ mismatch: } /* Check reverse. */ - gas_assert (i.operands == 2); + gas_assert (i.operands >= 2 && i.operands <= 3); - for (j = 0; j < 2; j++) + for (j = 0; j < i.operands; j++) { - if ((t->operand_types[j].bitfield.reg - || t->operand_types[j].bitfield.acc) - && !match_reg_size (t, j, !j)) + unsigned int given = i.operands - j - 1; + + if (t->operand_types[j].bitfield.class == Reg + && !match_operand_size (t, j, given)) + goto mismatch; + + if (t->operand_types[j].bitfield.class == RegSIMD + && !match_simd_size (t, j, given)) + goto mismatch; + + if (t->operand_types[j].bitfield.instance == Accum + && (!match_operand_size (t, j, given) + || !match_simd_size (t, j, given))) goto mismatch; - if (i.types[!j].bitfield.mem - && !match_mem_size (t, j, !j)) + if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given)) goto mismatch; } @@ -2044,7 +2220,6 @@ operand_type_match (i386_operand_type overlap, { i386_operand_type temp = overlap; - temp.bitfield.jumpabsolute = 0; temp.bitfield.unspecified = 0; temp.bitfield.byte = 0; temp.bitfield.word = 0; @@ -2058,8 +2233,7 @@ operand_type_match (i386_operand_type overlap, if (operand_type_all_zero (&temp)) goto mismatch; - if (given.bitfield.baseindex == overlap.bitfield.baseindex - && given.bitfield.jumpabsolute == overlap.bitfield.jumpabsolute) + if (given.bitfield.baseindex == overlap.bitfield.baseindex) return 1; mismatch: @@ -2078,18 +2252,18 @@ operand_type_register_match (i386_operand_type g0, i386_operand_type g1, i386_operand_type t1) { - if (!g0.bitfield.reg - && !g0.bitfield.regsimd + if (g0.bitfield.class != Reg + && g0.bitfield.class != RegSIMD && (!operand_type_check (g0, anymem) || g0.bitfield.unspecified - || !t0.bitfield.regsimd)) + || t0.bitfield.class != RegSIMD)) return 1; - if (!g1.bitfield.reg - && !g1.bitfield.regsimd + if (g1.bitfield.class != Reg + && g1.bitfield.class != RegSIMD && (!operand_type_check (g1, anymem) || g1.bitfield.unspecified - || !t1.bitfield.regsimd)) + || t1.bitfield.class != RegSIMD)) return 1; if (g0.bitfield.byte == g1.bitfield.byte @@ -2909,6 +3083,11 @@ md_begin (void) x86_dwarf2_return_column = 8; x86_cie_data_alignment = -4; } + + /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it + can be turned into BRANCH_PREFIX frag. */ + if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE) + abort (); } void @@ -2927,7 +3106,7 @@ static void pe (expressionS *); static void ps (symbolS *); static void -pi (char *line, i386_insn *x) +pi (const char *line, i386_insn *x) { unsigned int j; @@ -2951,14 +3130,13 @@ pi (char *line, i386_insn *x) fprintf (stdout, " #%d: ", j + 1); pt (x->types[j]); fprintf (stdout, "\n"); - if (x->types[j].bitfield.reg - || x->types[j].bitfield.regmmx - || x->types[j].bitfield.regsimd - || x->types[j].bitfield.sreg2 - || x->types[j].bitfield.sreg3 - || x->types[j].bitfield.control - || x->types[j].bitfield.debug - || x->types[j].bitfield.test) + if (x->types[j].bitfield.class == Reg + || x->types[j].bitfield.class == RegMMX + || x->types[j].bitfield.class == RegSIMD + || x->types[j].bitfield.class == SReg + || x->types[j].bitfield.class == RegCR + || x->types[j].bitfield.class == RegDR + || x->types[j].bitfield.class == RegTR) fprintf (stdout, "%s\n", x->op[j].regs->reg_name); if (operand_type_check (x->types[j], imm)) pe (x->op[j].imms); @@ -3028,6 +3206,10 @@ const type_names[] = { OPERAND_TYPE_REG16, "r16" }, { OPERAND_TYPE_REG32, "r32" }, { OPERAND_TYPE_REG64, "r64" }, + { OPERAND_TYPE_ACC8, "acc8" }, + { OPERAND_TYPE_ACC16, "acc16" }, + { OPERAND_TYPE_ACC32, "acc32" }, + { OPERAND_TYPE_ACC64, "acc64" }, { OPERAND_TYPE_IMM8, "i8" }, { OPERAND_TYPE_IMM8, "i8s" }, { OPERAND_TYPE_IMM16, "i16" }, @@ -3048,16 +3230,12 @@ const type_names[] = { OPERAND_TYPE_DEBUG, "debug reg" }, { OPERAND_TYPE_FLOATREG, "FReg" }, { OPERAND_TYPE_FLOATACC, "FAcc" }, - { OPERAND_TYPE_SREG2, "SReg2" }, - { OPERAND_TYPE_SREG3, "SReg3" }, - { OPERAND_TYPE_ACC, "Acc" }, - { OPERAND_TYPE_JUMPABSOLUTE, "Jump Absolute" }, + { OPERAND_TYPE_SREG, "SReg" }, { OPERAND_TYPE_REGMMX, "rMMX" }, { OPERAND_TYPE_REGXMM, "rXMM" }, { OPERAND_TYPE_REGYMM, "rYMM" }, { OPERAND_TYPE_REGZMM, "rZMM" }, { OPERAND_TYPE_REGMASK, "Mask reg" }, - { OPERAND_TYPE_ESSEG, "es" }, }; static void @@ -3069,7 +3247,7 @@ pt (i386_operand_type t) for (j = 0; j < ARRAY_SIZE (type_names); j++) { a = operand_type_and (t, type_names[j].mask); - if (!operand_type_all_zero (&a)) + if (operand_type_equal (&a, &type_names[j].mask)) fprintf (stdout, "%s, ", type_names[j].name); } fflush (stdout); @@ -3312,6 +3490,7 @@ build_vex_prefix (const insn_template *t) unsigned int register_specifier; unsigned int implied_prefix; unsigned int vector_length; + unsigned int w; /* Check register specifier. */ if (i.vex.register_specifier) @@ -3323,13 +3502,15 @@ build_vex_prefix (const insn_template *t) else register_specifier = 0xf; - /* Use 2-byte VEX prefix by swapping destination and source - operand. */ - if (i.vec_encoding != vex_encoding_vex3 + /* Use 2-byte VEX prefix by swapping destination and source operand + if there are more than 1 register operand. */ + if (i.reg_operands > 1 + && i.vec_encoding != vex_encoding_vex3 && i.dir_encoding == dir_encoding_default && i.operands == i.reg_operands + && operand_type_equal (&i.types[0], &i.types[i.operands - 1]) && i.tm.opcode_modifier.vexopcode == VEX0F - && i.tm.opcode_modifier.load + && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d) && i.rex == REX_B) { unsigned int xchg = i.operands - 1; @@ -3350,8 +3531,48 @@ build_vex_prefix (const insn_template *t) i.rm.regmem = i.rm.reg; i.rm.reg = xchg; - /* Use the next insn. */ - i.tm = t[1]; + if (i.tm.opcode_modifier.d) + i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e + ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD; + else /* Use the next insn. */ + i.tm = t[1]; + } + + /* Use 2-byte VEX prefix by swapping commutative source operands if there + are no memory operands and at least 3 register ones. */ + if (i.reg_operands >= 3 + && i.vec_encoding != vex_encoding_vex3 + && i.reg_operands == i.operands - i.imm_operands + && i.tm.opcode_modifier.vex + && i.tm.opcode_modifier.commutative + && (i.tm.opcode_modifier.sse2avx || optimize > 1) + && i.rex == REX_B + && i.vex.register_specifier + && !(i.vex.register_specifier->reg_flags & RegRex)) + { + unsigned int xchg = i.operands - i.reg_operands; + union i386_op temp_op; + i386_operand_type temp_type; + + gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F); + gas_assert (!i.tm.opcode_modifier.sae); + gas_assert (operand_type_equal (&i.types[i.operands - 2], + &i.types[i.operands - 3])); + gas_assert (i.rm.mode == 3); + + temp_type = i.types[xchg]; + i.types[xchg] = i.types[xchg + 1]; + i.types[xchg + 1] = temp_type; + temp_op = i.op[xchg]; + i.op[xchg] = i.op[xchg + 1]; + i.op[xchg + 1] = temp_op; + + i.rex = 0; + xchg = i.rm.regmem | 8; + i.rm.regmem = ~register_specifier & 0xf; + gas_assert (!(i.rm.regmem & 8)); + i.vex.register_specifier += xchg - i.rm.regmem; + register_specifier = ~xchg & 0xf; } if (i.tm.opcode_modifier.vex == VEXScalar) @@ -3362,8 +3583,10 @@ build_vex_prefix (const insn_template *t) { unsigned int op; + /* Determine vector length from the last multi-length vector + operand. */ vector_length = 0; - for (op = 0; op < t->operands; ++op) + for (op = t->operands; op--;) if (t->operand_types[op].bitfield.xmmword && t->operand_types[op].bitfield.ymmword && i.types[op].bitfield.ymmword) @@ -3391,10 +3614,18 @@ build_vex_prefix (const insn_template *t) abort (); } + /* Check the REX.W bit and VEXW. */ + if (i.tm.opcode_modifier.vexw == VEXWIG) + w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0; + else if (i.tm.opcode_modifier.vexw) + w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0; + else + w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0; + /* Use 2-byte VEX prefix if possible. */ - if (i.vec_encoding != vex_encoding_vex3 + if (w == 0 + && i.vec_encoding != vex_encoding_vex3 && i.tm.opcode_modifier.vexopcode == VEX0F - && i.tm.opcode_modifier.vexw != VEXW1 && (i.rex & (REX_W | REX_X | REX_B)) == 0) { /* 2-byte VEX prefix. */ @@ -3413,7 +3644,7 @@ build_vex_prefix (const insn_template *t) else { /* 3-byte VEX prefix. */ - unsigned int m, w; + unsigned int m; i.vex.length = 3; @@ -3451,11 +3682,6 @@ build_vex_prefix (const insn_template *t) of RXB bits from REX. */ i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m; - /* Check the REX.W bit. */ - w = (i.rex & REX_W) ? 1 : 0; - if (i.tm.opcode_modifier.vexw == VEXW1) - w = 1; - i.vex.bytes[2] = (w << 7 | register_specifier << 3 | vector_length << 2 @@ -3468,7 +3694,14 @@ is_evex_encoding (const insn_template *t) { return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift || t->opcode_modifier.broadcast || t->opcode_modifier.masking - || t->opcode_modifier.staticrounding || t->opcode_modifier.sae; + || t->opcode_modifier.sae; +} + +static INLINE bfd_boolean +is_any_vex_encoding (const insn_template *t) +{ + return t->opcode_modifier.vex || t->opcode_modifier.vexopcode + || is_evex_encoding (t); } /* Build the EVEX prefix. */ @@ -3574,19 +3807,13 @@ build_evex_prefix (void) i.vrex &= ~vrex_used; gas_assert (i.vrex == 0); - /* Check the REX.W bit. */ - w = (i.rex & REX_W) ? 1 : 0; - if (i.tm.opcode_modifier.vexw) - { - if (i.tm.opcode_modifier.vexw == VEXW1) - w = 1; - } - /* If w is not set it means we are dealing with WIG instruction. */ - else if (!w) - { - if (evexwig == evexw1) - w = 1; - } + /* Check the REX.W bit and VEXW. */ + if (i.tm.opcode_modifier.vexw == VEXWIG) + w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0; + else if (i.tm.opcode_modifier.vexw) + w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0; + else + w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0; /* Encode the U bit. */ implied_prefix |= 0x4; @@ -3610,22 +3837,32 @@ build_evex_prefix (void) { unsigned int op; + /* Determine vector length from the last multi-length vector + operand. */ vec_length = 0; - for (op = 0; op < i.tm.operands; ++op) + for (op = i.operands; op--;) if (i.tm.operand_types[op].bitfield.xmmword + i.tm.operand_types[op].bitfield.ymmword + i.tm.operand_types[op].bitfield.zmmword > 1) { if (i.types[op].bitfield.zmmword) - i.tm.opcode_modifier.evex = EVEX512; + { + i.tm.opcode_modifier.evex = EVEX512; + break; + } else if (i.types[op].bitfield.ymmword) - i.tm.opcode_modifier.evex = EVEX256; + { + i.tm.opcode_modifier.evex = EVEX256; + break; + } else if (i.types[op].bitfield.xmmword) - i.tm.opcode_modifier.evex = EVEX128; + { + i.tm.opcode_modifier.evex = EVEX128; + break; + } else if (i.broadcast && (int) op == i.broadcast->operand) { - switch ((i.tm.operand_types[op].bitfield.dword ? 4 : 8) - * i.broadcast->type) + switch (i.broadcast->bytes) { case 64: i.tm.opcode_modifier.evex = EVEX512; @@ -3637,12 +3874,14 @@ build_evex_prefix (void) i.tm.opcode_modifier.evex = EVEX128; break; default: - continue; + abort (); } + break; } - continue; - break; } + + if (op >= MAX_OPERANDS) + abort (); } switch (i.tm.opcode_modifier.evex) @@ -3685,52 +3924,6 @@ process_immext (void) { expressionS *exp; - if ((i.tm.cpu_flags.bitfield.cpusse3 || i.tm.cpu_flags.bitfield.cpusvme) - && i.operands > 0) - { - /* MONITOR/MWAIT as well as SVME instructions have fixed operands - with an opcode suffix which is coded in the same place as an - 8-bit immediate field would be. - Here we check those operands and remove them afterwards. */ - unsigned int x; - - for (x = 0; x < i.operands; x++) - if (register_number (i.op[x].regs) != x) - as_bad (_("can't use register '%s%s' as operand %d in '%s'."), - register_prefix, i.op[x].regs->reg_name, x + 1, - i.tm.name); - - i.operands = 0; - } - - if (i.tm.cpu_flags.bitfield.cpumwaitx && i.operands > 0) - { - /* MONITORX/MWAITX instructions have fixed operands with an opcode - suffix which is coded in the same place as an 8-bit immediate - field would be. - Here we check those operands and remove them afterwards. */ - unsigned int x; - - if (i.operands != 3) - abort(); - - for (x = 0; x < 2; x++) - if (register_number (i.op[x].regs) != x) - goto bad_register_operand; - - /* Check for third operand for mwaitx/monitorx insn. */ - if (register_number (i.op[x].regs) - != (x + (i.tm.extension_opcode == 0xfb))) - { -bad_register_operand: - as_bad (_("can't use register '%s%s' as operand %d in '%s'."), - register_prefix, i.op[x].regs->reg_name, x+1, - i.tm.name); - } - - i.operands = 0; - } - /* These AMD 3DNow! and SSE2 instructions have an opcode suffix which is coded in the same place as an 8-bit immediate field would be. Here we fake an 8-bit immediate operand from the @@ -3741,9 +3934,7 @@ bad_register_operand: gas_assert (i.imm_operands <= 1 && (i.operands <= 2 - || ((i.tm.opcode_modifier.vex - || i.tm.opcode_modifier.vexopcode - || is_evex_encoding (&i.tm)) + || (is_any_vex_encoding (&i.tm) && i.operands <= 4))); exp = &im_expressions[i.imm_operands++]; @@ -3781,8 +3972,7 @@ check_hle (void) i.tm.name); return 0; } - if (i.mem_operands == 0 - || !operand_type_check (i.types[i.operands - 1], anymem)) + if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem)) { as_bad (_("memory destination needed for instruction `%s'" " after `xrelease'"), i.tm.name); @@ -3797,16 +3987,16 @@ check_hle (void) static void optimize_encoding (void) { - int j; + unsigned int j; if (optimize_for_space + && !is_any_vex_encoding (&i.tm) && i.reg_operands == 1 && i.imm_operands == 1 && !i.types[1].bitfield.byte && i.op[0].imms->X_op == O_constant && fits_in_imm7 (i.op[0].imms->X_add_number) - && ((i.tm.base_opcode == 0xa8 - && i.tm.extension_opcode == None) + && (i.tm.base_opcode == 0xa8 || (i.tm.base_opcode == 0xf6 && i.tm.extension_opcode == 0x0))) { @@ -3819,50 +4009,50 @@ optimize_encoding (void) i.types[1].bitfield.byte = 1; /* Ignore the suffix. */ i.suffix = 0; - if (base_regnum >= 4 - && !(i.op[1].regs->reg_flags & RegRex)) - { - /* Handle SP, BP, SI and DI registers. */ - if (i.types[1].bitfield.word) - j = 16; - else if (i.types[1].bitfield.dword) - j = 32; - else - j = 48; - i.op[1].regs -= j; - } + /* Convert to byte registers. */ + if (i.types[1].bitfield.word) + j = 16; + else if (i.types[1].bitfield.dword) + j = 32; + else + j = 48; + if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4) + j += 8; + i.op[1].regs -= j; } } else if (flag_code == CODE_64BIT + && !is_any_vex_encoding (&i.tm) && ((i.types[1].bitfield.qword && i.reg_operands == 1 && i.imm_operands == 1 && i.op[0].imms->X_op == O_constant - && ((i.tm.base_opcode == 0xb0 + && ((i.tm.base_opcode == 0xb8 && i.tm.extension_opcode == None && fits_in_unsigned_long (i.op[0].imms->X_add_number)) || (fits_in_imm31 (i.op[0].imms->X_add_number) - && (((i.tm.base_opcode == 0x24 - || i.tm.base_opcode == 0xa8) - && i.tm.extension_opcode == None) + && ((i.tm.base_opcode == 0x24 + || i.tm.base_opcode == 0xa8) || (i.tm.base_opcode == 0x80 && i.tm.extension_opcode == 0x4) || ((i.tm.base_opcode == 0xf6 - || i.tm.base_opcode == 0xc6) - && i.tm.extension_opcode == 0x0))))) + || (i.tm.base_opcode | 1) == 0xc7) + && i.tm.extension_opcode == 0x0))) + || (fits_in_imm7 (i.op[0].imms->X_add_number) + && i.tm.base_opcode == 0x83 + && i.tm.extension_opcode == 0x4))) || (i.types[0].bitfield.qword && ((i.reg_operands == 2 && i.op[0].regs == i.op[1].regs - && ((i.tm.base_opcode == 0x30 - || i.tm.base_opcode == 0x28) - && i.tm.extension_opcode == None)) + && (i.tm.base_opcode == 0x30 + || i.tm.base_opcode == 0x28)) || (i.reg_operands == 1 && i.operands == 1 - && i.tm.base_opcode == 0x30 - && i.tm.extension_opcode == None))))) + && i.tm.base_opcode == 0x30))))) { /* Optimize: -O: andq $imm31, %r64 -> andl $imm31, %r32 + andq $imm7, %r64 -> andl $imm7, %r32 testq $imm31, %r64 -> testl $imm31, %r32 xorq %r64, %r64 -> xorl %r32, %r32 subq %r64, %r64 -> subl %r32, %r32 @@ -3870,7 +4060,7 @@ optimize_encoding (void) movq $imm32, %r64 -> movl $imm32, %r32 */ i.tm.opcode_modifier.norex64 = 1; - if (i.tm.base_opcode == 0xb0 || i.tm.base_opcode == 0xc6) + if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7) { /* Handle movq $imm31, %r64 -> movl $imm31, %r32 @@ -3884,20 +4074,44 @@ optimize_encoding (void) i.types[0].bitfield.imm64 = 0; i.types[1].bitfield.dword = 1; i.types[1].bitfield.qword = 0; - if (i.tm.base_opcode == 0xc6) + if ((i.tm.base_opcode | 1) == 0xc7) { /* Handle movq $imm31, %r64 -> movl $imm31, %r32 */ - i.tm.base_opcode = 0xb0; + i.tm.base_opcode = 0xb8; i.tm.extension_opcode = None; + i.tm.opcode_modifier.w = 0; i.tm.opcode_modifier.shortform = 1; i.tm.opcode_modifier.modrm = 0; } } } else if (optimize > 1 - && i.reg_operands == 3 + && !optimize_for_space + && !is_any_vex_encoding (&i.tm) + && i.reg_operands == 2 + && i.op[0].regs == i.op[1].regs + && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8 + || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20) + && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword)) + { + /* Optimize: -O2: + andb %rN, %rN -> testb %rN, %rN + andw %rN, %rN -> testw %rN, %rN + andq %rN, %rN -> testq %rN, %rN + orb %rN, %rN -> testb %rN, %rN + orw %rN, %rN -> testw %rN, %rN + orq %rN, %rN -> testq %rN, %rN + + and outside of 64-bit mode + + andl %rN, %rN -> testl %rN, %rN + orl %rN, %rN -> testl %rN, %rN + */ + i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1); + } + else if (i.reg_operands == 3 && i.op[0].regs == i.op[1].regs && !i.types[2].bitfield.xmmword && (i.tm.opcode_modifier.vex @@ -3905,10 +4119,10 @@ optimize_encoding (void) && !i.rounding && is_evex_encoding (&i.tm) && (i.vec_encoding != vex_encoding_evex + || cpu_arch_isa_flags.bitfield.cpuavx512vl || i.tm.cpu_flags.bitfield.cpuavx512vl || (i.tm.operand_types[2].bitfield.zmmword - && i.types[2].bitfield.ymmword) - || cpu_arch_isa_flags.bitfield.cpuavx512vl))) + && i.types[2].bitfield.ymmword)))) && ((i.tm.base_opcode == 0x55 || i.tm.base_opcode == 0x6655 || i.tm.base_opcode == 0x66df @@ -3918,18 +4132,22 @@ optimize_encoding (void) || i.tm.base_opcode == 0x66f8 || i.tm.base_opcode == 0x66f9 || i.tm.base_opcode == 0x66fa - || i.tm.base_opcode == 0x66fb) + || i.tm.base_opcode == 0x66fb + || i.tm.base_opcode == 0x42 + || i.tm.base_opcode == 0x6642 + || i.tm.base_opcode == 0x47 + || i.tm.base_opcode == 0x6647) && i.tm.extension_opcode == None)) { - /* Optimize: -O2: + /* Optimize: -O1: VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd, vpsubq and vpsubw: EVEX VOP %zmmM, %zmmM, %zmmN -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) EVEX VOP %ymmM, %ymmM, %ymmN -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) VEX VOP %ymmM, %ymmM, %ymmN -> VEX VOP %xmmM, %xmmM, %xmmN VOP, one of vpandn and vpxor: @@ -3938,28 +4156,41 @@ optimize_encoding (void) VOP, one of vpandnd and vpandnq: EVEX VOP %zmmM, %zmmM, %zmmN -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) EVEX VOP %ymmM, %ymmM, %ymmN -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) VOP, one of vpxord and vpxorq: EVEX VOP %zmmM, %zmmM, %zmmN -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) EVEX VOP %ymmM, %ymmM, %ymmN -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) + VOP, one of kxord and kxorq: + VEX VOP %kM, %kM, %kN + -> VEX kxorw %kM, %kM, %kN + VOP, one of kandnd and kandnq: + VEX VOP %kM, %kM, %kN + -> VEX kandnw %kM, %kM, %kN */ if (is_evex_encoding (&i.tm)) { - if (i.vec_encoding == vex_encoding_evex) - i.tm.opcode_modifier.evex = EVEX128; - else + if (i.vec_encoding != vex_encoding_evex) { i.tm.opcode_modifier.vex = VEX128; i.tm.opcode_modifier.vexw = VEXW0; i.tm.opcode_modifier.evex = 0; } + else if (optimize > 1) + i.tm.opcode_modifier.evex = EVEX128; + else + return; + } + else if (i.tm.operand_types[0].bitfield.class == RegMask) + { + i.tm.base_opcode &= 0xff; + i.tm.opcode_modifier.vexw = VEXW0; } else i.tm.opcode_modifier.vex = VEX128; @@ -3971,6 +4202,84 @@ optimize_encoding (void) i.types[j].bitfield.ymmword = 0; } } + else if (i.vec_encoding != vex_encoding_evex + && !i.types[0].bitfield.zmmword + && !i.types[1].bitfield.zmmword + && !i.mask + && !i.broadcast + && is_evex_encoding (&i.tm) + && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f + || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f + || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f + || (i.tm.base_opcode & ~4) == 0x66db + || (i.tm.base_opcode & ~4) == 0x66eb) + && i.tm.extension_opcode == None) + { + /* Optimize: -O1: + VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16, + vmovdqu32 and vmovdqu64: + EVEX VOP %xmmM, %xmmN + -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16) + EVEX VOP %ymmM, %ymmN + -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16) + EVEX VOP %xmmM, mem + -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16) + EVEX VOP %ymmM, mem + -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16) + EVEX VOP mem, %xmmN + -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16) + EVEX VOP mem, %ymmN + -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16) + VOP, one of vpand, vpandn, vpor, vpxor: + EVEX VOP{d,q} %xmmL, %xmmM, %xmmN + -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16) + EVEX VOP{d,q} %ymmL, %ymmM, %ymmN + -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16) + EVEX VOP{d,q} mem, %xmmM, %xmmN + -> VEX VOP mem, %xmmM, %xmmN (M and N < 16) + EVEX VOP{d,q} mem, %ymmM, %ymmN + -> VEX VOP mem, %ymmM, %ymmN (M and N < 16) + */ + for (j = 0; j < i.operands; j++) + if (operand_type_check (i.types[j], disp) + && i.op[j].disps->X_op == O_constant) + { + /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix + has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4 + bytes, we choose EVEX Disp8 over VEX Disp32. */ + int evex_disp8, vex_disp8; + unsigned int memshift = i.memshift; + offsetT n = i.op[j].disps->X_add_number; + + evex_disp8 = fits_in_disp8 (n); + i.memshift = 0; + vex_disp8 = fits_in_disp8 (n); + if (evex_disp8 != vex_disp8) + { + i.memshift = memshift; + return; + } + + i.types[j].bitfield.disp8 = vex_disp8; + break; + } + if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f) + i.tm.base_opcode ^= 0xf36f ^ 0xf26f; + i.tm.opcode_modifier.vex + = i.types[0].bitfield.ymmword ? VEX256 : VEX128; + i.tm.opcode_modifier.vexw = VEXW0; + /* VPAND, VPOR, and VPXOR are commutative. */ + if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df) + i.tm.opcode_modifier.commutative = 1; + i.tm.opcode_modifier.evex = 0; + i.tm.opcode_modifier.masking = 0; + i.tm.opcode_modifier.broadcast = 0; + i.tm.opcode_modifier.disp8memshift = 0; + i.memshift = 0; + if (j < i.operands) + i.types[j].bitfield.disp8 + = fits_in_disp8 (i.op[j].disps->X_add_number); + } } /* This is the guts of the machine-dependent assembler. LINE points to a @@ -4051,14 +4360,17 @@ md_assemble (char *line) if (sse_check != check_none && !i.tm.opcode_modifier.noavx && !i.tm.cpu_flags.bitfield.cpuavx + && !i.tm.cpu_flags.bitfield.cpuavx512f && (i.tm.cpu_flags.bitfield.cpusse || i.tm.cpu_flags.bitfield.cpusse2 || i.tm.cpu_flags.bitfield.cpusse3 || i.tm.cpu_flags.bitfield.cpussse3 || i.tm.cpu_flags.bitfield.cpusse4_1 || i.tm.cpu_flags.bitfield.cpusse4_2 + || i.tm.cpu_flags.bitfield.cpusse4a || i.tm.cpu_flags.bitfield.cpupclmul || i.tm.cpu_flags.bitfield.cpuaes + || i.tm.cpu_flags.bitfield.cpusha || i.tm.cpu_flags.bitfield.cpugfni)) { (sse_check == check_warning @@ -4100,12 +4412,19 @@ md_assemble (char *line) && (!i.tm.opcode_modifier.islockable || i.mem_operands == 0 || (i.tm.base_opcode != 0x86 - && !operand_type_check (i.types[i.operands - 1], anymem)))) + && !(i.flags[i.operands - 1] & Operand_Mem)))) { as_bad (_("expecting lockable instruction after `lock'")); return; } + /* Check for data size prefix on VEX/XOP/EVEX encoded insns. */ + if (i.prefix[DATA_PREFIX] && is_any_vex_encoding (&i.tm)) + { + as_bad (_("data size prefix invalid with `%s'"), i.tm.name); + return; + } + /* Check if HLE prefix is OK. */ if (i.hle_prefix && !check_hle ()) return; @@ -4141,8 +4460,9 @@ md_assemble (char *line) } /* Check string instruction segment overrides. */ - if (i.tm.opcode_modifier.isstring && i.mem_operands != 0) + if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0) { + gas_assert (i.mem_operands); if (!check_string ()) return; i.disp_operands = 0; @@ -4170,9 +4490,8 @@ md_assemble (char *line) with 3 operands or less. */ if (i.operands <= 3) for (j = 0; j < i.operands; j++) - if (i.types[j].bitfield.inoutportreg - || i.types[j].bitfield.shiftcount - || (i.types[j].bitfield.acc && !i.types[j].bitfield.xmmword)) + if (i.types[j].bitfield.instance != InstanceNone + && !i.types[j].bitfield.xmmword) i.reg_operands--; /* ImmExt should be processed after SSE2AVX. */ @@ -4192,12 +4511,11 @@ md_assemble (char *line) as_warn (_("translating to `%sp'"), i.tm.name); } - if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.vexopcode - || is_evex_encoding (&i.tm)) + if (is_any_vex_encoding (&i.tm)) { - if (flag_code == CODE_16BIT) + if (!cpu_arch_flags.bitfield.cpui286) { - as_bad (_("instruction `%s' isn't supported in 16-bit mode."), + as_bad (_("instruction `%s' isn't supported outside of protected mode."), i.tm.name); return; } @@ -4219,9 +4537,9 @@ md_assemble (char *line) i.imm_operands = 0; } - if ((i.tm.opcode_modifier.jump - || i.tm.opcode_modifier.jumpbyte - || i.tm.opcode_modifier.jumpdword) + if ((i.tm.opcode_modifier.jump == JUMP + || i.tm.opcode_modifier.jump == JUMP_BYTE + || i.tm.opcode_modifier.jump == JUMP_DWORD) && i.op[0].disps->X_op == O_constant) { /* Convert "jmp constant" (and "call constant") to a jump (call) to @@ -4238,12 +4556,12 @@ md_assemble (char *line) instruction already has a prefix, we need to convert old registers to new ones. */ - if ((i.types[0].bitfield.reg && i.types[0].bitfield.byte + if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte && (i.op[0].regs->reg_flags & RegRex64) != 0) - || (i.types[1].bitfield.reg && i.types[1].bitfield.byte + || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte && (i.op[1].regs->reg_flags & RegRex64) != 0) - || (((i.types[0].bitfield.reg && i.types[0].bitfield.byte) - || (i.types[1].bitfield.reg && i.types[1].bitfield.byte)) + || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte) + || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte)) && i.rex != 0)) { int x; @@ -4252,9 +4570,10 @@ md_assemble (char *line) for (x = 0; x < 2; x++) { /* Look for 8 bit operand that uses old registers. */ - if (i.types[x].bitfield.reg && i.types[x].bitfield.byte + if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte && (i.op[x].regs->reg_flags & RegRex64) == 0) { + gas_assert (!(i.op[x].regs->reg_flags & RegRex)); /* In case it is "hi" register, give up. */ if (i.op[x].regs->reg_num > 3) as_bad (_("can't encode register '%s%s' in an " @@ -4273,15 +4592,16 @@ md_assemble (char *line) if (i.rex == 0 && i.rex_encoding) { /* Check if we can add a REX_OPCODE byte. Look for 8 bit operand - that uses legacy register. If it is "hi" register, don't add + that uses legacy register. If it is "hi" register, don't add the REX_OPCODE byte. */ int x; for (x = 0; x < 2; x++) - if (i.types[x].bitfield.reg + if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte && (i.op[x].regs->reg_flags & RegRex64) == 0 && i.op[x].regs->reg_num > 3) { + gas_assert (!(i.op[x].regs->reg_flags & RegRex)); i.rex_encoding = FALSE; break; } @@ -4295,6 +4615,17 @@ md_assemble (char *line) /* We are ready to output the insn. */ output_insn (); + + last_insn.seg = now_seg; + + if (i.tm.opcode_modifier.isprefix) + { + last_insn.kind = last_insn_prefix; + last_insn.name = i.tm.name; + last_insn.file = as_where (&last_insn.line); + } + else + last_insn.kind = last_insn_other; } static char * @@ -4359,10 +4690,10 @@ parse_insn (char *line, char *mnemonic) } /* If we are in 16-bit mode, do not allow addr16 or data16. Similarly, in 32-bit mode, do not allow addr32 or data32. */ - if ((current_templates->start->opcode_modifier.size16 - || current_templates->start->opcode_modifier.size32) + if ((current_templates->start->opcode_modifier.size == SIZE16 + || current_templates->start->opcode_modifier.size == SIZE32) && flag_code != CODE_64BIT - && (current_templates->start->opcode_modifier.size32 + && ((current_templates->start->opcode_modifier.size == SIZE32) ^ (flag_code == CODE_16BIT))) { as_bad (_("redundant %s prefix"), @@ -4446,10 +4777,11 @@ parse_insn (char *line, char *mnemonic) if (!current_templates) { - /* Check if we should swap operand or force 32bit displacement in + /* Deprecated functionality (new code should use pseudo-prefixes instead): + Check if we should swap operand or force 32bit displacement in encoding. */ if (mnem_p - 2 == dot_p && dot_p[1] == 's') - i.dir_encoding = dir_encoding_store; + i.dir_encoding = dir_encoding_swap; else if (mnem_p - 3 == dot_p && dot_p[1] == 'd' && dot_p[2] == '8') @@ -4469,46 +4801,50 @@ parse_insn (char *line, char *mnemonic) if (!current_templates) { check_suffix: - /* See if we can get a match by trimming off a suffix. */ - switch (mnem_p[-1]) + if (mnem_p > mnemonic) { - case WORD_MNEM_SUFFIX: - if (intel_syntax && (intel_float_operand (mnemonic) & 2)) - i.suffix = SHORT_MNEM_SUFFIX; - else - /* Fall through. */ - case BYTE_MNEM_SUFFIX: - case QWORD_MNEM_SUFFIX: - i.suffix = mnem_p[-1]; - mnem_p[-1] = '\0'; - current_templates = (const templates *) hash_find (op_hash, - mnemonic); - break; - case SHORT_MNEM_SUFFIX: - case LONG_MNEM_SUFFIX: - if (!intel_syntax) + /* See if we can get a match by trimming off a suffix. */ + switch (mnem_p[-1]) { - i.suffix = mnem_p[-1]; - mnem_p[-1] = '\0'; - current_templates = (const templates *) hash_find (op_hash, - mnemonic); - } - break; - - /* Intel Syntax. */ - case 'd': - if (intel_syntax) - { - if (intel_float_operand (mnemonic) == 1) + case WORD_MNEM_SUFFIX: + if (intel_syntax && (intel_float_operand (mnemonic) & 2)) i.suffix = SHORT_MNEM_SUFFIX; else - i.suffix = LONG_MNEM_SUFFIX; + /* Fall through. */ + case BYTE_MNEM_SUFFIX: + case QWORD_MNEM_SUFFIX: + i.suffix = mnem_p[-1]; mnem_p[-1] = '\0'; current_templates = (const templates *) hash_find (op_hash, - mnemonic); + mnemonic); + break; + case SHORT_MNEM_SUFFIX: + case LONG_MNEM_SUFFIX: + if (!intel_syntax) + { + i.suffix = mnem_p[-1]; + mnem_p[-1] = '\0'; + current_templates = (const templates *) hash_find (op_hash, + mnemonic); + } + break; + + /* Intel Syntax. */ + case 'd': + if (intel_syntax) + { + if (intel_float_operand (mnemonic) == 1) + i.suffix = SHORT_MNEM_SUFFIX; + else + i.suffix = LONG_MNEM_SUFFIX; + mnem_p[-1] = '\0'; + current_templates = (const templates *) hash_find (op_hash, + mnemonic); + } + break; } - break; } + if (!current_templates) { as_bad (_("no such instruction: `%s'"), token_start); @@ -4516,8 +4852,8 @@ check_suffix: } } - if (current_templates->start->opcode_modifier.jump - || current_templates->start->opcode_modifier.jumpbyte) + if (current_templates->start->opcode_modifier.jump == JUMP + || current_templates->start->opcode_modifier.jump == JUMP_BYTE) { /* Check for a branch hint. We allow ",pt" and ",pn" for predict taken and predict not taken respectively. @@ -4656,6 +4992,13 @@ parse_operands (char *l, const char *mnemonic) /* Now parse operand adding info to 'i' as we go along. */ END_STRING_AND_SAVE (l); + if (i.mem_operands > 1) + { + as_bad (_("too many memory references for `%s'"), + mnemonic); + return 0; + } + if (intel_syntax) operand_ok = i386_intel_operand (token_start, @@ -4701,14 +5044,21 @@ swap_2_operands (int xchg1, int xchg2) { union i386_op temp_op; i386_operand_type temp_type; + unsigned int temp_flags; enum bfd_reloc_code_real temp_reloc; temp_type = i.types[xchg2]; i.types[xchg2] = i.types[xchg1]; i.types[xchg1] = temp_type; + + temp_flags = i.flags[xchg2]; + i.flags[xchg2] = i.flags[xchg1]; + i.flags[xchg1] = temp_flags; + temp_op = i.op[xchg2]; i.op[xchg2] = i.op[xchg1]; i.op[xchg1] = temp_op; + temp_reloc = i.reloc[xchg2]; i.reloc[xchg2] = i.reloc[xchg1]; i.reloc[xchg1] = temp_reloc; @@ -4775,26 +5125,28 @@ optimize_imm (void) else if (i.reg_operands) { /* Figure out a suffix from the last register operand specified. - We can't do this properly yet, ie. excluding InOutPortReg, - but the following works for instructions with immediates. - In any case, we can't set i.suffix yet. */ + We can't do this properly yet, i.e. excluding special register + instances, but the following works for instructions with + immediates. In any case, we can't set i.suffix yet. */ for (op = i.operands; --op >= 0;) - if (i.types[op].bitfield.reg && i.types[op].bitfield.byte) + if (i.types[op].bitfield.class != Reg) + continue; + else if (i.types[op].bitfield.byte) { guess_suffix = BYTE_MNEM_SUFFIX; break; } - else if (i.types[op].bitfield.reg && i.types[op].bitfield.word) + else if (i.types[op].bitfield.word) { guess_suffix = WORD_MNEM_SUFFIX; break; } - else if (i.types[op].bitfield.reg && i.types[op].bitfield.dword) + else if (i.types[op].bitfield.dword) { guess_suffix = LONG_MNEM_SUFFIX; break; } - else if (i.types[op].bitfield.reg && i.types[op].bitfield.qword) + else if (i.types[op].bitfield.qword) { guess_suffix = QWORD_MNEM_SUFFIX; break; @@ -4883,8 +5235,10 @@ optimize_imm (void) for (t = current_templates->start; t < current_templates->end; ++t) - allowed = operand_type_or (allowed, - t->operand_types[op]); + { + allowed = operand_type_or (allowed, t->operand_types[op]); + allowed = operand_type_and (allowed, anyimm); + } switch (guess_suffix) { case QWORD_MNEM_SUFFIX: @@ -4991,6 +5345,22 @@ optimize_disp (void) } } +/* Return 1 if there is a match in broadcast bytes between operand + GIVEN and instruction template T. */ + +static INLINE int +match_broadcast_size (const insn_template *t, unsigned int given) +{ + return ((t->opcode_modifier.broadcast == BYTE_BROADCAST + && i.types[given].bitfield.byte) + || (t->opcode_modifier.broadcast == WORD_BROADCAST + && i.types[given].bitfield.word) + || (t->opcode_modifier.broadcast == DWORD_BROADCAST + && i.types[given].bitfield.dword) + || (t->opcode_modifier.broadcast == QWORD_BROADCAST + && i.types[given].bitfield.qword)); +} + /* Check if operands are valid for the instruction. */ static int @@ -5060,10 +5430,10 @@ check_VecOperands (const insn_template *t) gas_assert (i.reg_operands == 2 || i.mask); if (i.reg_operands == 2 && !i.mask) { - gas_assert (i.types[0].bitfield.regsimd); + gas_assert (i.types[0].bitfield.class == RegSIMD); gas_assert (i.types[0].bitfield.xmmword || i.types[0].bitfield.ymmword); - gas_assert (i.types[2].bitfield.regsimd); + gas_assert (i.types[2].bitfield.class == RegSIMD); gas_assert (i.types[2].bitfield.xmmword || i.types[2].bitfield.ymmword); if (operand_check == check_none) @@ -5084,7 +5454,7 @@ check_VecOperands (const insn_template *t) } else if (i.reg_operands == 1 && i.mask) { - if (i.types[1].bitfield.regsimd + if (i.types[1].bitfield.class == RegSIMD && (i.types[1].bitfield.xmmword || i.types[1].bitfield.ymmword || i.types[1].bitfield.zmmword) @@ -5109,23 +5479,29 @@ check_VecOperands (const insn_template *t) i386_operand_type type, overlap; /* Check if specified broadcast is supported in this instruction, - and it's applied to memory operand of DWORD or QWORD type. */ + and its broadcast bytes match the memory operand. */ op = i.broadcast->operand; if (!t->opcode_modifier.broadcast - || !i.types[op].bitfield.mem + || !(i.flags[op] & Operand_Mem) || (!i.types[op].bitfield.unspecified - && (t->operand_types[op].bitfield.dword - ? !i.types[op].bitfield.dword - : !i.types[op].bitfield.qword))) + && !match_broadcast_size (t, op))) { bad_broadcast: i.error = unsupported_broadcast; return 1; } + i.broadcast->bytes = ((1 << (t->opcode_modifier.broadcast - 1)) + * i.broadcast->type); operand_type_set (&type, 0); - switch ((t->operand_types[op].bitfield.dword ? 4 : 8) * i.broadcast->type) + switch (i.broadcast->bytes) { + case 2: + type.bitfield.word = 1; + break; + case 4: + type.bitfield.dword = 1; + break; case 8: type.bitfield.qword = 1; break; @@ -5168,13 +5544,11 @@ check_VecOperands (const insn_template *t) { /* Find memory operand. */ for (op = 0; op < i.operands; op++) - if (operand_type_check (i.types[op], anymem)) + if (i.flags[op] & Operand_Mem) break; gas_assert (op < i.operands); /* Check size of the memory operand. */ - if (t->operand_types[op].bitfield.dword - ? i.types[op].bitfield.dword - : i.types[op].bitfield.qword) + if (match_broadcast_size (t, op)) { i.error = broadcast_needed; return 1; @@ -5184,13 +5558,39 @@ check_VecOperands (const insn_template *t) op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning. */ /* Check if requested masking is supported. */ - if (i.mask - && (!t->opcode_modifier.masking - || (i.mask->zeroing - && t->opcode_modifier.masking == MERGING_MASKING))) + if (i.mask) { - i.error = unsupported_masking; - return 1; + switch (t->opcode_modifier.masking) + { + case BOTH_MASKING: + break; + case MERGING_MASKING: + if (i.mask->zeroing) + { + case 0: + i.error = unsupported_masking; + return 1; + } + break; + case DYNAMIC_MASKING: + /* Memory destinations allow only merging masking. */ + if (i.mask->zeroing && i.mem_operands) + { + /* Find memory operand. */ + for (op = 0; op < i.operands; op++) + if (i.flags[op] & Operand_Mem) + break; + gas_assert (op < i.operands); + if (op == i.operands - 1) + { + i.error = unsupported_masking; + return 1; + } + } + break; + default: + abort (); + } } /* Check if masking is applied to dest operand. */ @@ -5203,11 +5603,8 @@ check_VecOperands (const insn_template *t) /* Check RC/SAE. */ if (i.rounding) { - if ((i.rounding->type != saeonly - && !t->opcode_modifier.staticrounding) - || (i.rounding->type == saeonly - && (t->opcode_modifier.staticrounding - || !t->opcode_modifier.sae))) + if (!t->opcode_modifier.sae + || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding)) { i.error = unsupported_rc_sae; return 1; @@ -5228,7 +5625,7 @@ check_VecOperands (const insn_template *t) && i.disp_encoding != disp_encoding_32bit) { if (i.broadcast) - i.memshift = t->operand_types[op].bitfield.dword ? 2 : 3; + i.memshift = t->opcode_modifier.broadcast - 1; else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL) i.memshift = t->opcode_modifier.disp8memshift; else @@ -5237,16 +5634,19 @@ check_VecOperands (const insn_template *t) i.memshift = 0; for (op = 0; op < i.operands; op++) - if (operand_type_check (i.types[op], anymem)) + if (i.flags[op] & Operand_Mem) { - if (t->operand_types[op].bitfield.xmmword - + t->operand_types[op].bitfield.ymmword - + t->operand_types[op].bitfield.zmmword <= 1) + if (t->opcode_modifier.evex == EVEXLIG) + i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX); + else if (t->operand_types[op].bitfield.xmmword + + t->operand_types[op].bitfield.ymmword + + t->operand_types[op].bitfield.zmmword <= 1) type = &t->operand_types[op]; else if (!i.types[op].bitfield.unspecified) type = &i.types[op]; } - else if (i.types[op].bitfield.regsimd) + else if (i.types[op].bitfield.class == RegSIMD + && t->opcode_modifier.evex != EVEXLIG) { if (i.types[op].bitfield.zmmword) i.memshift = 6; @@ -5317,8 +5717,8 @@ VEX_check_operands (const insn_template *t) return 0; } - /* Only check VEX_Imm4, which must be the first operand. */ - if (t->operand_types[0].bitfield.vec_imm4) + /* Check the special Imm4 cases; must be the first operand. */ + if (t->cpu_flags.bitfield.cpuxop && t->operands == 5) { if (i.op[0].imms->X_op != O_constant || !fits_in_imm4 (i.op[0].imms->X_add_number)) @@ -5327,8 +5727,8 @@ VEX_check_operands (const insn_template *t) return 1; } - /* Turn off Imm8 so that update_imm won't complain. */ - i.types[0] = vec_imm4; + /* Turn off Imm so that update_imm won't complain. */ + operand_type_set (&i.types[0], 0); } return 0; @@ -5342,7 +5742,7 @@ match_template (char mnem_suffix) i386_operand_type overlap0, overlap1, overlap2, overlap3; i386_operand_type overlap4; unsigned int found_reverse_match; - i386_opcode_modifier suffix_check, mnemsuf_check; + i386_opcode_modifier suffix_check; i386_operand_type operand_types [MAX_OPERANDS]; int addr_prefix_disp; unsigned int j; @@ -5357,33 +5757,33 @@ match_template (char mnem_suffix) found_reverse_match = 0; addr_prefix_disp = -1; + /* Prepare for mnemonic suffix check. */ memset (&suffix_check, 0, sizeof (suffix_check)); - if (intel_syntax && i.broadcast) - /* nothing */; - else if (i.suffix == BYTE_MNEM_SUFFIX) - suffix_check.no_bsuf = 1; - else if (i.suffix == WORD_MNEM_SUFFIX) - suffix_check.no_wsuf = 1; - else if (i.suffix == SHORT_MNEM_SUFFIX) - suffix_check.no_ssuf = 1; - else if (i.suffix == LONG_MNEM_SUFFIX) - suffix_check.no_lsuf = 1; - else if (i.suffix == QWORD_MNEM_SUFFIX) - suffix_check.no_qsuf = 1; - else if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX) - suffix_check.no_ldsuf = 1; - - memset (&mnemsuf_check, 0, sizeof (mnemsuf_check)); - if (intel_syntax) + switch (mnem_suffix) { - switch (mnem_suffix) - { - case BYTE_MNEM_SUFFIX: mnemsuf_check.no_bsuf = 1; break; - case WORD_MNEM_SUFFIX: mnemsuf_check.no_wsuf = 1; break; - case SHORT_MNEM_SUFFIX: mnemsuf_check.no_ssuf = 1; break; - case LONG_MNEM_SUFFIX: mnemsuf_check.no_lsuf = 1; break; - case QWORD_MNEM_SUFFIX: mnemsuf_check.no_qsuf = 1; break; - } + case BYTE_MNEM_SUFFIX: + suffix_check.no_bsuf = 1; + break; + case WORD_MNEM_SUFFIX: + suffix_check.no_wsuf = 1; + break; + case SHORT_MNEM_SUFFIX: + suffix_check.no_ssuf = 1; + break; + case LONG_MNEM_SUFFIX: + suffix_check.no_lsuf = 1; + break; + case QWORD_MNEM_SUFFIX: + suffix_check.no_qsuf = 1; + break; + default: + /* NB: In Intel syntax, normally we can check for memory operand + size when there is no mnemonic suffix. But jmp and call have + 2 different encodings with Dword memory operand size, one with + No_ldSuf and the other without. i.suffix is set to + LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf. */ + if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX) + suffix_check.no_ldsuf = 1; } /* Must have right number of operands. */ @@ -5392,6 +5792,7 @@ match_template (char mnem_suffix) for (t = current_templates->start; t < current_templates->end; t++) { addr_prefix_disp = -1; + found_reverse_match = 0; if (i.operands != t->operands) continue; @@ -5416,29 +5817,32 @@ match_template (char mnem_suffix) || (!intel64 && t->opcode_modifier.intel64)) continue; - /* Check the suffix, except for some instructions in intel mode. */ + /* Check the suffix. */ i.error = invalid_instruction_suffix; - if ((!intel_syntax || !t->opcode_modifier.ignoresize) - && ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf) - || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf) - || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf) - || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf) - || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf) - || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))) - continue; - /* In Intel mode all mnemonic suffixes must be explicitly allowed. */ - if ((t->opcode_modifier.no_bsuf && mnemsuf_check.no_bsuf) - || (t->opcode_modifier.no_wsuf && mnemsuf_check.no_wsuf) - || (t->opcode_modifier.no_lsuf && mnemsuf_check.no_lsuf) - || (t->opcode_modifier.no_ssuf && mnemsuf_check.no_ssuf) - || (t->opcode_modifier.no_qsuf && mnemsuf_check.no_qsuf) - || (t->opcode_modifier.no_ldsuf && mnemsuf_check.no_ldsuf)) + if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf) + || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf) + || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf) + || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf) + || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf) + || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf)) continue; size_match = operand_size_match (t); if (!size_match) continue; + /* This is intentionally not + + if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)) + + as the case of a missing * on the operand is accepted (perhaps with + a warning, issued further down). */ + if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE) + { + i.error = operand_type_mismatch; + continue; + } + for (j = 0; j < MAX_OPERANDS; j++) operand_types[j] = t->operand_types[j]; @@ -5450,10 +5854,10 @@ match_template (char mnem_suffix) && !t->opcode_modifier.broadcast && !intel_float_operand (t->name)) : intel_float_operand (t->name) != 2) - && ((!operand_types[0].bitfield.regmmx - && !operand_types[0].bitfield.regsimd) - || (!operand_types[t->operands > 1].bitfield.regmmx - && !operand_types[t->operands > 1].bitfield.regsimd)) + && ((operand_types[0].bitfield.class != RegMMX + && operand_types[0].bitfield.class != RegSIMD) + || (operand_types[t->operands > 1].bitfield.class != RegMMX + && operand_types[t->operands > 1].bitfield.class != RegSIMD)) && (t->base_opcode != 0x0fc7 || t->extension_opcode != 1 /* cmpxchg8b */)) continue; @@ -5465,10 +5869,11 @@ match_template (char mnem_suffix) ? (!t->opcode_modifier.ignoresize && !intel_float_operand (t->name)) : intel_float_operand (t->name) != 2) - && ((!operand_types[0].bitfield.regmmx - && !operand_types[0].bitfield.regsimd) - || (!operand_types[t->operands > 1].bitfield.regmmx - && !operand_types[t->operands > 1].bitfield.regsimd))) + && ((operand_types[0].bitfield.class != RegMMX + && operand_types[0].bitfield.class != RegSIMD) + || (operand_types[t->operands > 1].bitfield.class != RegMMX + && operand_types[t->operands > 1].bitfield.class + != RegSIMD))) continue; /* Do not verify operands when there are none. */ @@ -5479,51 +5884,50 @@ match_template (char mnem_suffix) break; } - /* Address size prefix will turn Disp64/Disp32/Disp16 operand - into Disp32/Disp16/Disp32 operand. */ - if (i.prefix[ADDR_PREFIX] != 0) - { - /* There should be only one Disp operand. */ - switch (flag_code) - { - case CODE_16BIT: - for (j = 0; j < MAX_OPERANDS; j++) - { - if (operand_types[j].bitfield.disp16) - { - addr_prefix_disp = j; - operand_types[j].bitfield.disp32 = 1; - operand_types[j].bitfield.disp16 = 0; - break; - } - } + if (!t->opcode_modifier.jump + || t->opcode_modifier.jump == JUMP_ABSOLUTE) + { + /* There should be only one Disp operand. */ + for (j = 0; j < MAX_OPERANDS; j++) + if (operand_type_check (operand_types[j], disp)) break; - case CODE_32BIT: - for (j = 0; j < MAX_OPERANDS; j++) + if (j < MAX_OPERANDS) + { + bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0); + + addr_prefix_disp = j; + + /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16 + operand into Disp32/Disp32/Disp16/Disp32 operand. */ + switch (flag_code) { - if (operand_types[j].bitfield.disp32) + case CODE_16BIT: + override = !override; + /* Fall through. */ + case CODE_32BIT: + if (operand_types[j].bitfield.disp32 + && operand_types[j].bitfield.disp16) { - addr_prefix_disp = j; - operand_types[j].bitfield.disp32 = 0; - operand_types[j].bitfield.disp16 = 1; - break; + operand_types[j].bitfield.disp16 = override; + operand_types[j].bitfield.disp32 = !override; } - } - break; - case CODE_64BIT: - for (j = 0; j < MAX_OPERANDS; j++) - { - if (operand_types[j].bitfield.disp64) + operand_types[j].bitfield.disp32s = 0; + operand_types[j].bitfield.disp64 = 0; + break; + + case CODE_64BIT: + if (operand_types[j].bitfield.disp32s + || operand_types[j].bitfield.disp64) { - addr_prefix_disp = j; - operand_types[j].bitfield.disp64 = 0; - operand_types[j].bitfield.disp32 = 1; - break; + operand_types[j].bitfield.disp64 &= !override; + operand_types[j].bitfield.disp32s &= !override; + operand_types[j].bitfield.disp32 = override; } + operand_types[j].bitfield.disp16 = 0; + break; } - break; } - } + } /* Force 0x8b encoding for "mov foo@GOT, %eax". */ if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0) @@ -5553,28 +5957,55 @@ match_template (char mnem_suffix) zero-extend %eax to %rax. */ if (flag_code == CODE_64BIT && t->base_opcode == 0x90 - && operand_type_equal (&i.types [0], &acc32) - && operand_type_equal (&i.types [1], &acc32)) + && i.types[0].bitfield.instance == Accum + && i.types[0].bitfield.dword + && i.types[1].bitfield.instance == Accum + && i.types[1].bitfield.dword) continue; /* xrelease mov %eax, is another special case. It must not match the accumulator-only encoding of mov. */ if (flag_code != CODE_64BIT && i.hle_prefix && t->base_opcode == 0xa0 - && i.types[0].bitfield.acc - && operand_type_check (i.types[1], anymem)) + && i.types[0].bitfield.instance == Accum + && (i.flags[1] & Operand_Mem)) continue; - if (!(size_match & MATCH_STRAIGHT)) - goto check_reverse; - /* If we want store form, we reverse direction of operands. */ - if (i.dir_encoding == dir_encoding_store - && t->opcode_modifier.d) - goto check_reverse; /* Fall through. */ case 3: + if (!(size_match & MATCH_STRAIGHT)) + goto check_reverse; + /* Reverse direction of operands if swapping is possible in the first + place (operands need to be symmetric) and + - the load form is requested, and the template is a store form, + - the store form is requested, and the template is a load form, + - the non-default (swapped) form is requested. */ + overlap1 = operand_type_and (operand_types[0], operand_types[1]); + if (t->opcode_modifier.d && i.reg_operands == i.operands + && !operand_type_all_zero (&overlap1)) + switch (i.dir_encoding) + { + case dir_encoding_load: + if (operand_type_check (operand_types[i.operands - 1], anymem) + || t->opcode_modifier.regmem) + goto check_reverse; + break; + + case dir_encoding_store: + if (!operand_type_check (operand_types[i.operands - 1], anymem) + && !t->opcode_modifier.regmem) + goto check_reverse; + break; + + case dir_encoding_swap: + goto check_reverse; + + case dir_encoding_default: + break; + } /* If we want store form, we skip the current load. */ - if (i.dir_encoding == dir_encoding_store + if ((i.dir_encoding == dir_encoding_store + || i.dir_encoding == dir_encoding_swap) && i.mem_operands == 0 && t->opcode_modifier.load) continue; @@ -5598,14 +6029,14 @@ check_reverse: if (!(size_match & MATCH_REVERSE)) continue; /* Try reversing direction of operands. */ - overlap0 = operand_type_and (i.types[0], operand_types[1]); - overlap1 = operand_type_and (i.types[1], operand_types[0]); + overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]); + overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]); if (!operand_type_match (overlap0, i.types[0]) - || !operand_type_match (overlap1, i.types[1]) + || !operand_type_match (overlap1, i.types[i.operands - 1]) || (check_register && !operand_type_register_match (i.types[0], - operand_types[1], - i.types[1], + operand_types[i.operands - 1], + i.types[i.operands - 1], operand_types[0]))) { /* Does not match either direction. */ @@ -5617,6 +6048,13 @@ check_reverse: found_reverse_match = 0; else if (operand_types[0].bitfield.tbyte) found_reverse_match = Opcode_FloatD; + else if (operand_types[0].bitfield.xmmword + || operand_types[i.operands - 1].bitfield.xmmword + || operand_types[0].bitfield.class == RegMMX + || operand_types[i.operands - 1].bitfield.class == RegMMX + || is_any_vex_encoding(t)) + found_reverse_match = (t->base_opcode & 0xee) != 0x6e + ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD; else found_reverse_match = Opcode_D; if (t->opcode_modifier.floatr) @@ -5687,10 +6125,7 @@ check_reverse: slip through to break. */ } if (!found_cpu_match) - { - found_reverse_match = 0; - continue; - } + continue; /* Check if vector and VEX operands are valid. */ if (check_VecOperands (t) || VEX_check_operands (t)) @@ -5751,9 +6186,6 @@ check_reverse: case unsupported_broadcast: err_msg = _("unsupported broadcast"); break; - case broadcast_not_on_src_operand: - err_msg = _("broadcast not on source memory operand"); - break; case broadcast_needed: err_msg = _("broadcast is needed for operand of such type"); break; @@ -5787,11 +6219,8 @@ check_reverse: if (!quiet_warnings) { if (!intel_syntax - && (i.types[0].bitfield.jumpabsolute - != operand_types[0].bitfield.jumpabsolute)) - { - as_warn (_("indirect %s without `*'"), t->name); - } + && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))) + as_warn (_("indirect %s without `*'"), t->name); if (t->opcode_modifier.isprefix && t->opcode_modifier.ignoresize) @@ -5811,14 +6240,22 @@ check_reverse: if (found_reverse_match) { - /* If we found a reverse match we must alter the opcode - direction bit. found_reverse_match holds bits to change - (different for int & float insns). */ + /* If we found a reverse match we must alter the opcode direction + bit and clear/flip the regmem modifier one. found_reverse_match + holds bits to change (different for int & float insns). */ i.tm.base_opcode ^= found_reverse_match; - i.tm.operand_types[0] = operand_types[1]; - i.tm.operand_types[1] = operand_types[0]; + i.tm.operand_types[0] = operand_types[i.operands - 1]; + i.tm.operand_types[i.operands - 1] = operand_types[0]; + + /* Certain SIMD insns have their load forms specified in the opcode + table, and hence we need to _set_ RegMem instead of clearing it. + We need to avoid setting the bit though on insns like KMOVW. */ + i.tm.opcode_modifier.regmem + = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d + && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx + && !i.tm.opcode_modifier.regmem; } return t; @@ -5827,34 +6264,24 @@ check_reverse: static int check_string (void) { - int mem_op = operand_type_check (i.types[0], anymem) ? 0 : 1; - if (i.tm.operand_types[mem_op].bitfield.esseg) - { - if (i.seg[0] != NULL && i.seg[0] != &es) - { - as_bad (_("`%s' operand %d must use `%ses' segment"), - i.tm.name, - mem_op + 1, - register_prefix); - return 0; - } - /* There's only ever one segment override allowed per instruction. - This instruction possibly has a legal segment override on the - second operand, so copy the segment to where non-string - instructions store it, allowing common code. */ - i.seg[0] = i.seg[1]; - } - else if (i.tm.operand_types[mem_op + 1].bitfield.esseg) + unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0; + unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0; + + if (i.seg[op] != NULL && i.seg[op] != &es) { - if (i.seg[1] != NULL && i.seg[1] != &es) - { - as_bad (_("`%s' operand %d must use `%ses' segment"), - i.tm.name, - mem_op + 2, - register_prefix); - return 0; - } + as_bad (_("`%s' operand %u must use `%ses' segment"), + i.tm.name, + intel_syntax ? i.tm.operands - es_op : es_op + 1, + register_prefix); + return 0; } + + /* There's only ever one segment override allowed per instruction. + This instruction possibly has a legal segment override on the + second operand, so copy the segment to where non-string + instructions store it, allowing common code. */ + i.seg[op] = i.seg[1]; + return 1; } @@ -5863,43 +6290,41 @@ process_suffix (void) { /* If matched instruction specifies an explicit instruction mnemonic suffix, use it. */ - if (i.tm.opcode_modifier.size16) + if (i.tm.opcode_modifier.size == SIZE16) i.suffix = WORD_MNEM_SUFFIX; - else if (i.tm.opcode_modifier.size32) + else if (i.tm.opcode_modifier.size == SIZE32) i.suffix = LONG_MNEM_SUFFIX; - else if (i.tm.opcode_modifier.size64) + else if (i.tm.opcode_modifier.size == SIZE64) i.suffix = QWORD_MNEM_SUFFIX; - else if (i.reg_operands) + else if (i.reg_operands + && (i.operands > 1 || i.types[0].bitfield.class == Reg)) { /* If there's no instruction mnemonic suffix we try to invent one - based on register operands. */ + based on GPR operands. */ if (!i.suffix) { /* We take i.suffix from the last register operand specified, Destination register type is more significant than source register type. crc32 in SSE4.2 prefers source register type. */ - if (i.tm.base_opcode == 0xf20f38f1) + if (i.tm.base_opcode == 0xf20f38f0 + && i.types[0].bitfield.class == Reg) { - if (i.types[0].bitfield.reg && i.types[0].bitfield.word) + if (i.types[0].bitfield.byte) + i.suffix = BYTE_MNEM_SUFFIX; + else if (i.types[0].bitfield.word) i.suffix = WORD_MNEM_SUFFIX; - else if (i.types[0].bitfield.reg && i.types[0].bitfield.dword) + else if (i.types[0].bitfield.dword) i.suffix = LONG_MNEM_SUFFIX; - else if (i.types[0].bitfield.reg && i.types[0].bitfield.qword) + else if (i.types[0].bitfield.qword) i.suffix = QWORD_MNEM_SUFFIX; } - else if (i.tm.base_opcode == 0xf20f38f0) - { - if (i.types[0].bitfield.reg && i.types[0].bitfield.byte) - i.suffix = BYTE_MNEM_SUFFIX; - } if (!i.suffix) { int op; - if (i.tm.base_opcode == 0xf20f38f1 - || i.tm.base_opcode == 0xf20f38f0) + if (i.tm.base_opcode == 0xf20f38f0) { /* We have to know the operand size for crc32. */ as_bad (_("ambiguous memory operand size for `%s`"), @@ -5908,10 +6333,10 @@ process_suffix (void) } for (op = i.operands; --op >= 0;) - if (!i.tm.operand_types[op].bitfield.inoutportreg - && !i.tm.operand_types[op].bitfield.shiftcount) + if (i.tm.operand_types[op].bitfield.instance == InstanceNone + || i.tm.operand_types[op].bitfield.instance == Accum) { - if (!i.types[op].bitfield.reg) + if (i.types[op].bitfield.class != Reg) continue; if (i.types[op].bitfield.byte) i.suffix = BYTE_MNEM_SUFFIX; @@ -5976,15 +6401,34 @@ process_suffix (void) else if (i.tm.opcode_modifier.defaultsize && !i.suffix /* exclude fldenv/frstor/fsave/fstenv */ - && i.tm.opcode_modifier.no_ssuf) + && i.tm.opcode_modifier.no_ssuf + /* exclude sysret */ + && i.tm.base_opcode != 0x0f07) { i.suffix = stackop_size; + if (stackop_size == LONG_MNEM_SUFFIX) + { + /* stackop_size is set to LONG_MNEM_SUFFIX for the + .code16gcc directive to support 16-bit mode with + 32-bit address. For IRET without a suffix, generate + 16-bit IRET (opcode 0xcf) to return from an interrupt + handler. */ + if (i.tm.base_opcode == 0xcf) + { + i.suffix = WORD_MNEM_SUFFIX; + as_warn (_("generating 16-bit `iret' for .code16gcc directive")); + } + /* Warn about changed behavior for segment register push/pop. */ + else if ((i.tm.base_opcode | 1) == 0x07) + as_warn (_("generating 32-bit `%s', unlike earlier gas versions"), + i.tm.name); + } } else if (intel_syntax && !i.suffix - && (i.tm.operand_types[0].bitfield.jumpabsolute - || i.tm.opcode_modifier.jumpbyte - || i.tm.opcode_modifier.jumpintersegment + && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE + || i.tm.opcode_modifier.jump == JUMP_BYTE + || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT || (i.tm.base_opcode == 0x0f01 /* [ls][gi]dt */ && i.tm.extension_opcode <= 3))) { @@ -6074,9 +6518,9 @@ process_suffix (void) size prefix, except for instructions that will ignore this prefix anyway. */ if (i.reg_operands > 0 - && i.types[0].bitfield.reg + && i.types[0].bitfield.class == Reg && i.tm.opcode_modifier.addrprefixopreg - && (i.tm.opcode_modifier.immext + && (i.tm.operand_types[0].bitfield.instance == Accum || i.operands == 1)) { /* The address size override prefix changes the size of the @@ -6091,13 +6535,14 @@ process_suffix (void) else if (i.suffix != QWORD_MNEM_SUFFIX && !i.tm.opcode_modifier.ignoresize && !i.tm.opcode_modifier.floatmf + && !is_any_vex_encoding (&i.tm) && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT) || (flag_code == CODE_64BIT - && i.tm.opcode_modifier.jumpbyte))) + && i.tm.opcode_modifier.jump == JUMP_BYTE))) { unsigned int prefix = DATA_PREFIX_OPCODE; - if (i.tm.opcode_modifier.jumpbyte) /* jcxz, loop */ + if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */ prefix = ADDR_PREFIX_OPCODE; if (!add_prefix (prefix)) @@ -6113,8 +6558,10 @@ process_suffix (void) && ! (i.operands == 2 && i.tm.base_opcode == 0x90 && i.tm.extension_opcode == None - && operand_type_equal (&i.types [0], &acc64) - && operand_type_equal (&i.types [1], &acc64))) + && i.types[0].bitfield.instance == Accum + && i.types[0].bitfield.qword + && i.types[1].bitfield.instance == Accum + && i.types[1].bitfield.qword)) i.rex |= REX_W; break; @@ -6123,7 +6570,7 @@ process_suffix (void) if (i.reg_operands != 0 && i.operands > 1 && i.tm.opcode_modifier.addrprefixopreg - && !i.tm.opcode_modifier.immext) + && i.tm.operand_types[0].bitfield.instance != Accum) { /* Check invalid register operand when the address size override prefix changes the size of register operands. */ @@ -6141,7 +6588,7 @@ process_suffix (void) } for (op = 0; op < i.operands; op++) - if (i.types[op].bitfield.reg + if (i.types[op].bitfield.class == Reg && ((need == need_word && !i.op[op].regs->reg_type.bitfield.word) || (need == need_dword @@ -6166,7 +6613,7 @@ check_byte_reg (void) for (op = i.operands; --op >= 0;) { /* Skip non-register operands. */ - if (!i.types[op].bitfield.reg) + if (i.types[op].bitfield.class != Reg) continue; /* If this is an eight bit register, it's OK. If it's the 16 or @@ -6176,7 +6623,8 @@ check_byte_reg (void) continue; /* I/O port address operands are OK too. */ - if (i.tm.operand_types[op].bitfield.inoutportreg) + if (i.tm.operand_types[op].bitfield.instance == RegD + && i.tm.operand_types[op].bitfield.word) continue; /* crc32 doesn't generate this warning. */ @@ -6205,14 +6653,13 @@ check_byte_reg (void) continue; } /* Any other register is bad. */ - if (i.types[op].bitfield.reg - || i.types[op].bitfield.regmmx - || i.types[op].bitfield.regsimd - || i.types[op].bitfield.sreg2 - || i.types[op].bitfield.sreg3 - || i.types[op].bitfield.control - || i.types[op].bitfield.debug - || i.types[op].bitfield.test) + if (i.types[op].bitfield.class == Reg + || i.types[op].bitfield.class == RegMMX + || i.types[op].bitfield.class == RegSIMD + || i.types[op].bitfield.class == SReg + || i.types[op].bitfield.class == RegCR + || i.types[op].bitfield.class == RegDR + || i.types[op].bitfield.class == RegTR) { as_bad (_("`%s%s' not allowed with `%s%c'"), register_prefix, @@ -6232,13 +6679,13 @@ check_long_reg (void) for (op = i.operands; --op >= 0;) /* Skip non-register operands. */ - if (!i.types[op].bitfield.reg) + if (i.types[op].bitfield.class != Reg) continue; /* Reject eight bit registers, except where the template requires them. (eg. movzb) */ else if (i.types[op].bitfield.byte - && (i.tm.operand_types[op].bitfield.reg - || i.tm.operand_types[op].bitfield.acc) + && (i.tm.operand_types[op].bitfield.class == Reg + || i.tm.operand_types[op].bitfield.instance == Accum) && (i.tm.operand_types[op].bitfield.word || i.tm.operand_types[op].bitfield.dword)) { @@ -6252,8 +6699,8 @@ check_long_reg (void) /* Warn if the e prefix on a general reg is missing. */ else if ((!quiet_warnings || flag_code == CODE_64BIT) && i.types[op].bitfield.word - && (i.tm.operand_types[op].bitfield.reg - || i.tm.operand_types[op].bitfield.acc) + && (i.tm.operand_types[op].bitfield.class == Reg + || i.tm.operand_types[op].bitfield.instance == Accum) && i.tm.operand_types[op].bitfield.dword) { /* Prohibit these changes in the 64bit mode, since the @@ -6274,13 +6721,13 @@ check_long_reg (void) } /* Warn if the r prefix on a general reg is present. */ else if (i.types[op].bitfield.qword - && (i.tm.operand_types[op].bitfield.reg - || i.tm.operand_types[op].bitfield.acc) + && (i.tm.operand_types[op].bitfield.class == Reg + || i.tm.operand_types[op].bitfield.instance == Accum) && i.tm.operand_types[op].bitfield.dword) { if (intel_syntax && i.tm.opcode_modifier.toqword - && !i.types[0].bitfield.regsimd) + && i.types[0].bitfield.class != RegSIMD) { /* Convert to QWORD. We want REX byte. */ i.suffix = QWORD_MNEM_SUFFIX; @@ -6303,13 +6750,13 @@ check_qword_reg (void) for (op = i.operands; --op >= 0; ) /* Skip non-register operands. */ - if (!i.types[op].bitfield.reg) + if (i.types[op].bitfield.class != Reg) continue; /* Reject eight bit registers, except where the template requires them. (eg. movzb) */ else if (i.types[op].bitfield.byte - && (i.tm.operand_types[op].bitfield.reg - || i.tm.operand_types[op].bitfield.acc) + && (i.tm.operand_types[op].bitfield.class == Reg + || i.tm.operand_types[op].bitfield.instance == Accum) && (i.tm.operand_types[op].bitfield.word || i.tm.operand_types[op].bitfield.dword)) { @@ -6323,15 +6770,15 @@ check_qword_reg (void) /* Warn if the r prefix on a general reg is missing. */ else if ((i.types[op].bitfield.word || i.types[op].bitfield.dword) - && (i.tm.operand_types[op].bitfield.reg - || i.tm.operand_types[op].bitfield.acc) + && (i.tm.operand_types[op].bitfield.class == Reg + || i.tm.operand_types[op].bitfield.instance == Accum) && i.tm.operand_types[op].bitfield.qword) { /* Prohibit these changes in the 64bit mode, since the lowering is more complicated. */ if (intel_syntax && i.tm.opcode_modifier.todword - && !i.types[0].bitfield.regsimd) + && i.types[0].bitfield.class != RegSIMD) { /* Convert to DWORD. We don't want REX byte. */ i.suffix = LONG_MNEM_SUFFIX; @@ -6353,13 +6800,13 @@ check_word_reg (void) int op; for (op = i.operands; --op >= 0;) /* Skip non-register operands. */ - if (!i.types[op].bitfield.reg) + if (i.types[op].bitfield.class != Reg) continue; /* Reject eight bit registers, except where the template requires them. (eg. movzb) */ else if (i.types[op].bitfield.byte - && (i.tm.operand_types[op].bitfield.reg - || i.tm.operand_types[op].bitfield.acc) + && (i.tm.operand_types[op].bitfield.class == Reg + || i.tm.operand_types[op].bitfield.instance == Accum) && (i.tm.operand_types[op].bitfield.word || i.tm.operand_types[op].bitfield.dword)) { @@ -6374,8 +6821,8 @@ check_word_reg (void) else if ((!quiet_warnings || flag_code == CODE_64BIT) && (i.types[op].bitfield.dword || i.types[op].bitfield.qword) - && (i.tm.operand_types[op].bitfield.reg - || i.tm.operand_types[op].bitfield.acc) + && (i.tm.operand_types[op].bitfield.class == Reg + || i.tm.operand_types[op].bitfield.instance == Accum) && i.tm.operand_types[op].bitfield.word) { /* Prohibit these changes in the 64bit mode, since the @@ -6500,15 +6947,15 @@ process_operands (void) && MAX_OPERANDS > dupl && operand_type_equal (&i.types[dest], ®xmm)); - if (i.tm.operand_types[0].bitfield.acc + if (i.tm.operand_types[0].bitfield.instance == Accum && i.tm.operand_types[0].bitfield.xmmword) { if (i.tm.opcode_modifier.vexsources == VEX3SOURCES) { /* Keep xmm0 for instructions with VEX prefix and 3 sources. */ - i.tm.operand_types[0].bitfield.acc = 0; - i.tm.operand_types[0].bitfield.regsimd = 1; + i.tm.operand_types[0].bitfield.instance = InstanceNone; + i.tm.operand_types[0].bitfield.class = RegSIMD; goto duplicate; } else @@ -6521,6 +6968,7 @@ process_operands (void) i.op[j - 1] = i.op[j]; i.types[j - 1] = i.types[j]; i.tm.operand_types[j - 1] = i.tm.operand_types[j]; + i.flags[j - 1] = i.flags[j]; } } } @@ -6537,6 +6985,7 @@ process_operands (void) i.op[j] = i.op[j - 1]; i.types[j] = i.types[j - 1]; i.tm.operand_types[j] = i.tm.operand_types[j - 1]; + i.flags[j] = i.flags[j - 1]; } i.op[0].regs = (const reg_entry *) hash_find (reg_hash, "xmm0"); @@ -6552,6 +7001,7 @@ process_operands (void) i.op[dupl] = i.op[dest]; i.types[dupl] = i.types[dest]; i.tm.operand_types[dupl] = i.tm.operand_types[dest]; + i.flags[dupl] = i.flags[dest]; } else { @@ -6563,12 +7013,13 @@ duplicate: i.op[dupl] = i.op[dest]; i.types[dupl] = i.types[dest]; i.tm.operand_types[dupl] = i.tm.operand_types[dest]; + i.flags[dupl] = i.flags[dest]; } if (i.tm.opcode_modifier.immext) process_immext (); } - else if (i.tm.operand_types[0].bitfield.acc + else if (i.tm.operand_types[0].bitfield.instance == Accum && i.tm.operand_types[0].bitfield.xmmword) { unsigned int j; @@ -6581,6 +7032,8 @@ duplicate: /* We need to adjust fields in i.tm since they are used by build_modrm_byte. */ i.tm.operand_types [j - 1] = i.tm.operand_types [j]; + + i.flags[j - 1] = i.flags[j]; } i.operands--; @@ -6592,7 +7045,7 @@ duplicate: unsigned int regnum, first_reg_in_group, last_reg_in_group; /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */ - gas_assert (i.operands >= 2 && i.types[1].bitfield.regsimd); + gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD); regnum = register_number (i.op[1].regs); first_reg_in_group = regnum & ~3; last_reg_in_group = first_reg_in_group + 3; @@ -6625,57 +7078,7 @@ duplicate: i.reg_operands++; } - if (i.tm.opcode_modifier.shortform) - { - if (i.types[0].bitfield.sreg2 - || i.types[0].bitfield.sreg3) - { - if (i.tm.base_opcode == POP_SEG_SHORT - && i.op[0].regs->reg_num == 1) - { - as_bad (_("you can't `pop %scs'"), register_prefix); - return 0; - } - i.tm.base_opcode |= (i.op[0].regs->reg_num << 3); - if ((i.op[0].regs->reg_flags & RegRex) != 0) - i.rex |= REX_B; - } - else - { - /* The register or float register operand is in operand - 0 or 1. */ - unsigned int op; - - if ((i.types[0].bitfield.reg && i.types[0].bitfield.tbyte) - || operand_type_check (i.types[0], reg)) - op = 0; - else - op = 1; - /* Register goes in low 3 bits of opcode. */ - i.tm.base_opcode |= i.op[op].regs->reg_num; - if ((i.op[op].regs->reg_flags & RegRex) != 0) - i.rex |= REX_B; - if (!quiet_warnings && i.tm.opcode_modifier.ugh) - { - /* Warn about some common errors, but press on regardless. - The first case can be generated by gcc (<= 2.8.1). */ - if (i.operands == 2) - { - /* Reversed arguments on faddp, fsubp, etc. */ - as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name, - register_prefix, i.op[!intel_syntax].regs->reg_name, - register_prefix, i.op[intel_syntax].regs->reg_name); - } - else - { - /* Extraneous `l' suffix on fp insn. */ - as_warn (_("translating to `%s %s%s'"), i.tm.name, - register_prefix, i.op[0].regs->reg_name); - } - } - } - } - else if (i.tm.opcode_modifier.modrm) + if (i.tm.opcode_modifier.modrm) { /* The opcode is completed (modulo i.tm.extension_opcode which must be put into the modrm byte). Now, we make the modrm and @@ -6683,6 +7086,25 @@ duplicate: default_seg = build_modrm_byte (); } + else if (i.types[0].bitfield.class == SReg) + { + if (flag_code != CODE_64BIT + ? i.tm.base_opcode == POP_SEG_SHORT + && i.op[0].regs->reg_num == 1 + : (i.tm.base_opcode | 1) == POP_SEG386_SHORT + && i.op[0].regs->reg_num < 4) + { + as_bad (_("you can't `%s %s%s'"), + i.tm.name, register_prefix, i.op[0].regs->reg_name); + return 0; + } + if ( i.op[0].regs->reg_num > 3 && i.tm.opcode_length == 1 ) + { + i.tm.base_opcode ^= POP_SEG_SHORT ^ POP_SEG386_SHORT; + i.tm.opcode_length = 2; + } + i.tm.base_opcode |= (i.op[0].regs->reg_num << 3); + } else if ((i.tm.base_opcode & ~0x3) == MOV_AX_DISP32) { default_seg = &ds; @@ -6693,6 +7115,35 @@ duplicate: on one of their operands, the default segment is ds. */ default_seg = &ds; } + else if (i.tm.opcode_modifier.shortform) + { + /* The register or float register operand is in operand + 0 or 1. */ + unsigned int op = i.tm.operand_types[0].bitfield.class != Reg; + + /* Register goes in low 3 bits of opcode. */ + i.tm.base_opcode |= i.op[op].regs->reg_num; + if ((i.op[op].regs->reg_flags & RegRex) != 0) + i.rex |= REX_B; + if (!quiet_warnings && i.tm.opcode_modifier.ugh) + { + /* Warn about some common errors, but press on regardless. + The first case can be generated by gcc (<= 2.8.1). */ + if (i.operands == 2) + { + /* Reversed arguments on faddp, fsubp, etc. */ + as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name, + register_prefix, i.op[!intel_syntax].regs->reg_name, + register_prefix, i.op[intel_syntax].regs->reg_name); + } + else + { + /* Extraneous `l' suffix on fp insn. */ + as_warn (_("translating to `%s %s%s'"), i.tm.name, + register_prefix, i.op[0].regs->reg_name); + } + } + } if (i.tm.base_opcode == 0x8d /* lea */ && i.seg[0] @@ -6730,7 +7181,7 @@ build_modrm_byte (void) /* There are 2 kinds of instructions: 1. 5 operands: 4 register operands or 3 register operands - plus 1 memory operand plus one Vec_Imm4 operand, VexXDS, and + plus 1 memory operand plus one Imm4 operand, VexXDS, and VexW0 or VexW1. The destination must be either XMM, YMM or ZMM register. 2. 4 operands: 4 register operands or 3 register operands @@ -6739,7 +7190,7 @@ build_modrm_byte (void) || (i.reg_operands == 3 && i.mem_operands == 1)) && i.tm.opcode_modifier.vexvvvv == VEXXDS && i.tm.opcode_modifier.vexw - && i.tm.operand_types[dest].bitfield.regsimd); + && i.tm.operand_types[dest].bitfield.class == RegSIMD); /* If VexW1 is set, the first non-immediate operand is the source and the second non-immediate one is encoded in the immediate operand. */ @@ -6763,40 +7214,27 @@ build_modrm_byte (void) i.types[i.operands] = imm8; i.operands++; - gas_assert (i.tm.operand_types[reg_slot].bitfield.regsimd); + gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD); exp->X_op = O_constant; exp->X_add_number = register_number (i.op[reg_slot].regs) << 4; gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0); } else { - unsigned int imm_slot; + gas_assert (i.imm_operands == 1); + gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number)); + gas_assert (!i.tm.opcode_modifier.immext); - gas_assert (i.imm_operands == 1 && i.types[0].bitfield.vec_imm4); + /* Turn on Imm8 again so that output_imm will generate it. */ + i.types[0].bitfield.imm8 = 1; - if (i.tm.opcode_modifier.immext) - { - /* When ImmExt is set, the immediate byte is the last - operand. */ - imm_slot = i.operands - 1; - source--; - reg_slot--; - } - else - { - imm_slot = 0; - - /* Turn on Imm8 so that output_imm will generate it. */ - i.types[imm_slot].bitfield.imm8 = 1; - } - - gas_assert (i.tm.operand_types[reg_slot].bitfield.regsimd); - i.op[imm_slot].imms->X_add_number + gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD); + i.op[0].imms->X_add_number |= register_number (i.op[reg_slot].regs) << 4; gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0); } - gas_assert (i.tm.operand_types[nds].bitfield.regsimd); + gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD); i.vex.register_specifier = i.op[nds].regs; } else @@ -6828,9 +7266,11 @@ build_modrm_byte (void) gas_assert (i.imm_operands == 1 || (i.imm_operands == 0 && (i.tm.opcode_modifier.vexvvvv == VEXXDS - || i.types[0].bitfield.shiftcount))); + || (i.types[0].bitfield.instance == RegC + && i.types[0].bitfield.byte)))); if (operand_type_check (i.types[0], imm) - || i.types[0].bitfield.shiftcount) + || (i.types[0].bitfield.instance == RegC + && i.types[0].bitfield.byte)) source = 1; else source = 0; @@ -6898,8 +7338,7 @@ build_modrm_byte (void) { /* For instructions with VexNDS, the register-only source operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask - register. It is encoded in VEX prefix. We need to - clear RegMem bit before calling operand_type_equal. */ + register. It is encoded in VEX prefix. */ i386_operand_type op; unsigned int vvvv; @@ -6916,11 +7355,10 @@ build_modrm_byte (void) vvvv = dest; op = i.tm.operand_types[vvvv]; - op.bitfield.regmem = 0; if ((dest + 1) >= i.operands - || ((!op.bitfield.reg + || ((op.bitfield.class != Reg || (!op.bitfield.dword && !op.bitfield.qword)) - && !op.bitfield.regsimd + && op.bitfield.class != RegSIMD && !operand_type_equal (&op, ®mask))) abort (); i.vex.register_specifier = i.op[vvvv].regs; @@ -6929,17 +7367,32 @@ build_modrm_byte (void) } i.rm.mode = 3; - /* One of the register operands will be encoded in the i.tm.reg - field, the other in the combined i.tm.mode and i.tm.regmem + /* One of the register operands will be encoded in the i.rm.reg + field, the other in the combined i.rm.mode and i.rm.regmem fields. If no form of this instruction supports a memory destination operand, then we assume the source operand may sometimes be a memory operand and so we need to store the destination in the i.rm.reg field. */ - if (!i.tm.operand_types[dest].bitfield.regmem + if (!i.tm.opcode_modifier.regmem && operand_type_check (i.tm.operand_types[dest], anymem) == 0) { i.rm.reg = i.op[dest].regs->reg_num; i.rm.regmem = i.op[source].regs->reg_num; + if (i.op[dest].regs->reg_type.bitfield.class == RegMMX + || i.op[source].regs->reg_type.bitfield.class == RegMMX) + i.has_regmmx = TRUE; + else if (i.op[dest].regs->reg_type.bitfield.class == RegSIMD + || i.op[source].regs->reg_type.bitfield.class == RegSIMD) + { + if (i.types[dest].bitfield.zmmword + || i.types[source].bitfield.zmmword) + i.has_regzmm = TRUE; + else if (i.types[dest].bitfield.ymmword + || i.types[source].bitfield.ymmword) + i.has_regymm = TRUE; + else + i.has_regxmm = TRUE; + } if ((i.op[dest].regs->reg_flags & RegRex) != 0) i.rex |= REX_R; if ((i.op[dest].regs->reg_flags & RegVRex) != 0) @@ -6964,7 +7417,7 @@ build_modrm_byte (void) } if (flag_code != CODE_64BIT && (i.rex & REX_R)) { - if (!i.types[i.tm.operand_types[0].bitfield.regmem].bitfield.control) + if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR) abort (); i.rex &= ~REX_R; add_prefix (LOCK_PREFIX_OPCODE); @@ -6980,14 +7433,13 @@ build_modrm_byte (void) unsigned int op; for (op = 0; op < i.operands; op++) - if (operand_type_check (i.types[op], anymem)) + if (i.flags[op] & Operand_Mem) break; gas_assert (op < i.operands); if (i.tm.opcode_modifier.vecsib) { - if (i.index_reg->reg_num == RegEiz - || i.index_reg->reg_num == RegRiz) + if (i.index_reg->reg_num == RegIZ) abort (); i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING; @@ -7058,8 +7510,7 @@ build_modrm_byte (void) else if (!i.tm.opcode_modifier.vecsib) { /* !i.base_reg && i.index_reg */ - if (i.index_reg->reg_num == RegEiz - || i.index_reg->reg_num == RegRiz) + if (i.index_reg->reg_num == RegIZ) i.sib.index = NO_INDEX_REGISTER; else i.sib.index = i.index_reg->reg_num; @@ -7085,8 +7536,7 @@ build_modrm_byte (void) } } /* RIP addressing for 64bit mode. */ - else if (i.base_reg->reg_num == RegRip || - i.base_reg->reg_num == RegEip) + else if (i.base_reg->reg_num == RegIP) { gas_assert (!i.tm.opcode_modifier.vecsib); i.rm.regmem = NO_BASE_REGISTER; @@ -7178,8 +7628,7 @@ build_modrm_byte (void) } else if (!i.tm.opcode_modifier.vecsib) { - if (i.index_reg->reg_num == RegEiz - || i.index_reg->reg_num == RegRiz) + if (i.index_reg->reg_num == RegIZ) i.sib.index = NO_INDEX_REGISTER; else i.sib.index = i.index_reg->reg_num; @@ -7283,17 +7732,31 @@ build_modrm_byte (void) unsigned int vex_reg = ~0; for (op = 0; op < i.operands; op++) - if (i.types[op].bitfield.reg - || i.types[op].bitfield.regmmx - || i.types[op].bitfield.regsimd - || i.types[op].bitfield.regbnd - || i.types[op].bitfield.regmask - || i.types[op].bitfield.sreg2 - || i.types[op].bitfield.sreg3 - || i.types[op].bitfield.control - || i.types[op].bitfield.debug - || i.types[op].bitfield.test) - break; + { + if (i.types[op].bitfield.class == Reg + || i.types[op].bitfield.class == RegBND + || i.types[op].bitfield.class == RegMask + || i.types[op].bitfield.class == SReg + || i.types[op].bitfield.class == RegCR + || i.types[op].bitfield.class == RegDR + || i.types[op].bitfield.class == RegTR) + break; + if (i.types[op].bitfield.class == RegSIMD) + { + if (i.types[op].bitfield.zmmword) + i.has_regzmm = TRUE; + else if (i.types[op].bitfield.ymmword) + i.has_regymm = TRUE; + else + i.has_regxmm = TRUE; + break; + } + if (i.types[op].bitfield.class == RegMMX) + { + i.has_regmmx = TRUE; + break; + } + } if (vex_3_sources) op = dest; @@ -7352,9 +7815,9 @@ build_modrm_byte (void) { i386_operand_type *type = &i.tm.operand_types[vex_reg]; - if ((!type->bitfield.reg + if ((type->bitfield.class != Reg || (!type->bitfield.dword && !type->bitfield.qword)) - && !type->bitfield.regsimd + && type->bitfield.class != RegSIMD && !operand_type_equal (type, ®mask)) abort (); @@ -7398,6 +7861,18 @@ build_modrm_byte (void) return default_seg; } +static unsigned int +flip_code16 (unsigned int code16) +{ + gas_assert (i.tm.operands == 1); + + return !(i.prefix[REX_PREFIX] & REX_W) + && (code16 ? i.tm.operand_types[0].bitfield.disp32 + || i.tm.operand_types[0].bitfield.disp32s + : i.tm.operand_types[0].bitfield.disp16) + ? CODE16 : 0; +} + static void output_branch (void) { @@ -7417,7 +7892,7 @@ output_branch (void) { prefix = 1; i.prefixes -= 1; - code16 ^= CODE16; + code16 ^= flip_code16(code16); } /* Pentium4 branch hints. */ if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */ @@ -7435,12 +7910,12 @@ output_branch (void) /* BND prefixed jump. */ if (i.prefix[BND_PREFIX] != 0) { - FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]); - i.prefixes -= 1; + prefix++; + i.prefixes--; } - if (i.prefixes != 0 && !intel_syntax) - as_warn (_("skipping prefixes on this instruction")); + if (i.prefixes != 0) + as_warn (_("skipping prefixes on `%s'"), i.tm.name); /* It's always a symbol; End frag & setup for relax. Make sure there is enough room in this frag for the largest @@ -7455,6 +7930,8 @@ output_branch (void) if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE) *p++ = i.prefix[SEG_PREFIX]; + if (i.prefix[BND_PREFIX] != 0) + *p++ = BND_PREFIX_OPCODE; if (i.prefix[REX_PREFIX] != 0) *p++ = i.prefix[REX_PREFIX]; *p = i.tm.base_opcode; @@ -7494,6 +7971,12 @@ need_plt32_p (symbolS *s) if (!IS_ELF) return FALSE; +#ifdef TE_SOLARIS + /* Don't emit PLT32 relocation on Solaris: neither native linker nor + krtld support it. */ + return FALSE; +#endif + /* Since there is no need to prepare for PLT branch on x86-64, we can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can be used as a marker for 32-bit PC-relative branches. */ @@ -7524,7 +8007,7 @@ output_jump (void) fixS *fixP; bfd_reloc_code_real_type jump_reloc = i.reloc[0]; - if (i.tm.opcode_modifier.jumpbyte) + if (i.tm.opcode_modifier.jump == JUMP_BYTE) { /* This is a loop or jecxz type instruction. */ size = 1; @@ -7553,7 +8036,7 @@ output_jump (void) { FRAG_APPEND_1_CHAR (DATA_PREFIX_OPCODE); i.prefixes -= 1; - code16 ^= CODE16; + code16 ^= flip_code16(code16); } size = 4; @@ -7561,21 +8044,21 @@ output_jump (void) size = 2; } - if (i.prefix[REX_PREFIX] != 0) + /* BND prefixed jump. */ + if (i.prefix[BND_PREFIX] != 0) { - FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]); + FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]); i.prefixes -= 1; } - /* BND prefixed jump. */ - if (i.prefix[BND_PREFIX] != 0) + if (i.prefix[REX_PREFIX] != 0) { - FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]); + FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]); i.prefixes -= 1; } - if (i.prefixes != 0 && !intel_syntax) - as_warn (_("skipping prefixes on this instruction")); + if (i.prefixes != 0) + as_warn (_("skipping prefixes on `%s'"), i.tm.name); p = frag_more (i.tm.opcode_length + size); switch (i.tm.opcode_length) @@ -7628,18 +8111,15 @@ output_interseg_jump (void) i.prefixes -= 1; code16 ^= CODE16; } - if (i.prefix[REX_PREFIX] != 0) - { - prefix++; - i.prefixes -= 1; - } + + gas_assert (!i.prefix[REX_PREFIX]); size = 4; if (code16) size = 2; - if (i.prefixes != 0 && !intel_syntax) - as_warn (_("skipping prefixes on this instruction")); + if (i.prefixes != 0) + as_warn (_("skipping prefixes on `%s'"), i.tm.name); /* 1 opcode; 2 segment; offset */ p = frag_more (prefix + 1 + 2 + size); @@ -7673,11 +8153,427 @@ output_interseg_jump (void) md_number_to_chars (p + size, (valueT) i.op[0].imms->X_add_number, 2); } +#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) +void +x86_cleanup (void) +{ + char *p; + asection *seg = now_seg; + subsegT subseg = now_subseg; + asection *sec; + unsigned int alignment, align_size_1; + unsigned int isa_1_descsz, feature_2_descsz, descsz; + unsigned int isa_1_descsz_raw, feature_2_descsz_raw; + unsigned int padding; + + if (!IS_ELF || !x86_used_note) + return; + + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86; + + /* The .note.gnu.property section layout: + + Field Length Contents + ---- ---- ---- + n_namsz 4 4 + n_descsz 4 The note descriptor size + n_type 4 NT_GNU_PROPERTY_TYPE_0 + n_name 4 "GNU" + n_desc n_descsz The program property array + .... .... .... + */ + + /* Create the .note.gnu.property section. */ + sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0); + bfd_set_section_flags (sec, + (SEC_ALLOC + | SEC_LOAD + | SEC_DATA + | SEC_HAS_CONTENTS + | SEC_READONLY)); + + if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64) + { + align_size_1 = 7; + alignment = 3; + } + else + { + align_size_1 = 3; + alignment = 2; + } + + bfd_set_section_alignment (sec, alignment); + elf_section_type (sec) = SHT_NOTE; + + /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size + + 4-byte data */ + isa_1_descsz_raw = 4 + 4 + 4; + /* Align GNU_PROPERTY_X86_ISA_1_USED. */ + isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1; + + feature_2_descsz_raw = isa_1_descsz; + /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size + + 4-byte data */ + feature_2_descsz_raw += 4 + 4 + 4; + /* Align GNU_PROPERTY_X86_FEATURE_2_USED. */ + feature_2_descsz = ((feature_2_descsz_raw + align_size_1) + & ~align_size_1); + + descsz = feature_2_descsz; + /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz. */ + p = frag_more (4 + 4 + 4 + 4 + descsz); + + /* Write n_namsz. */ + md_number_to_chars (p, (valueT) 4, 4); + + /* Write n_descsz. */ + md_number_to_chars (p + 4, (valueT) descsz, 4); + + /* Write n_type. */ + md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4); + + /* Write n_name. */ + memcpy (p + 4 * 3, "GNU", 4); + + /* Write 4-byte type. */ + md_number_to_chars (p + 4 * 4, + (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4); + + /* Write 4-byte data size. */ + md_number_to_chars (p + 4 * 5, (valueT) 4, 4); + + /* Write 4-byte data. */ + md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4); + + /* Zero out paddings. */ + padding = isa_1_descsz - isa_1_descsz_raw; + if (padding) + memset (p + 4 * 7, 0, padding); + + /* Write 4-byte type. */ + md_number_to_chars (p + isa_1_descsz + 4 * 4, + (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4); + + /* Write 4-byte data size. */ + md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4); + + /* Write 4-byte data. */ + md_number_to_chars (p + isa_1_descsz + 4 * 6, + (valueT) x86_feature_2_used, 4); + + /* Zero out paddings. */ + padding = feature_2_descsz - feature_2_descsz_raw; + if (padding) + memset (p + isa_1_descsz + 4 * 7, 0, padding); + + /* We probably can't restore the current segment, for there likely + isn't one yet... */ + if (seg && subseg) + subseg_set (seg, subseg); +} +#endif + +static unsigned int +encoding_length (const fragS *start_frag, offsetT start_off, + const char *frag_now_ptr) +{ + unsigned int len = 0; + + if (start_frag != frag_now) + { + const fragS *fr = start_frag; + + do { + len += fr->fr_fix; + fr = fr->fr_next; + } while (fr && fr != frag_now); + } + + return len - start_off + (frag_now_ptr - frag_now->fr_literal); +} + +/* Return 1 for test, and, cmp, add, sub, inc and dec which may + be macro-fused with conditional jumps. */ + +static int +maybe_fused_with_jcc_p (void) +{ + /* No RIP address. */ + if (i.base_reg && i.base_reg->reg_num == RegIP) + return 0; + + /* No VEX/EVEX encoding. */ + if (is_any_vex_encoding (&i.tm)) + return 0; + + /* and, add, sub with destination register. */ + if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25) + || i.tm.base_opcode <= 5 + || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d) + || ((i.tm.base_opcode | 3) == 0x83 + && ((i.tm.extension_opcode | 1) == 0x5 + || i.tm.extension_opcode == 0x0))) + return (i.types[1].bitfield.class == Reg + || i.types[1].bitfield.instance == Accum); + + /* test, cmp with any register. */ + if ((i.tm.base_opcode | 1) == 0x85 + || (i.tm.base_opcode | 1) == 0xa9 + || ((i.tm.base_opcode | 1) == 0xf7 + && i.tm.extension_opcode == 0) + || (i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d) + || ((i.tm.base_opcode | 3) == 0x83 + && (i.tm.extension_opcode == 0x7))) + return (i.types[0].bitfield.class == Reg + || i.types[0].bitfield.instance == Accum + || i.types[1].bitfield.class == Reg + || i.types[1].bitfield.instance == Accum); + + /* inc, dec with any register. */ + if ((i.tm.cpu_flags.bitfield.cpuno64 + && (i.tm.base_opcode | 0xf) == 0x4f) + || ((i.tm.base_opcode | 1) == 0xff + && i.tm.extension_opcode <= 0x1)) + return (i.types[0].bitfield.class == Reg + || i.types[0].bitfield.instance == Accum); + + return 0; +} + +/* Return 1 if a FUSED_JCC_PADDING frag should be generated. */ + +static int +add_fused_jcc_padding_frag_p (void) +{ + /* NB: Don't work with COND_JUMP86 without i386. */ + if (!align_branch_power + || now_seg == absolute_section + || !cpu_arch_flags.bitfield.cpui386 + || !(align_branch & align_branch_fused_bit)) + return 0; + + if (maybe_fused_with_jcc_p ()) + { + if (last_insn.kind == last_insn_other + || last_insn.seg != now_seg) + return 1; + if (flag_debug) + as_warn_where (last_insn.file, last_insn.line, + _("`%s` skips -malign-branch-boundary on `%s`"), + last_insn.name, i.tm.name); + } + + return 0; +} + +/* Return 1 if a BRANCH_PREFIX frag should be generated. */ + +static int +add_branch_prefix_frag_p (void) +{ + /* NB: Don't work with COND_JUMP86 without i386. Don't add prefix + to PadLock instructions since they include prefixes in opcode. */ + if (!align_branch_power + || !align_branch_prefix_size + || now_seg == absolute_section + || i.tm.cpu_flags.bitfield.cpupadlock + || !cpu_arch_flags.bitfield.cpui386) + return 0; + + /* Don't add prefix if it is a prefix or there is no operand in case + that segment prefix is special. */ + if (!i.operands || i.tm.opcode_modifier.isprefix) + return 0; + + if (last_insn.kind == last_insn_other + || last_insn.seg != now_seg) + return 1; + + if (flag_debug) + as_warn_where (last_insn.file, last_insn.line, + _("`%s` skips -malign-branch-boundary on `%s`"), + last_insn.name, i.tm.name); + + return 0; +} + +/* Return 1 if a BRANCH_PADDING frag should be generated. */ + +static int +add_branch_padding_frag_p (enum align_branch_kind *branch_p) +{ + int add_padding; + + /* NB: Don't work with COND_JUMP86 without i386. */ + if (!align_branch_power + || now_seg == absolute_section + || !cpu_arch_flags.bitfield.cpui386) + return 0; + + add_padding = 0; + + /* Check for jcc and direct jmp. */ + if (i.tm.opcode_modifier.jump == JUMP) + { + if (i.tm.base_opcode == JUMP_PC_RELATIVE) + { + *branch_p = align_branch_jmp; + add_padding = align_branch & align_branch_jmp_bit; + } + else + { + *branch_p = align_branch_jcc; + if ((align_branch & align_branch_jcc_bit)) + add_padding = 1; + } + } + else if (is_any_vex_encoding (&i.tm)) + return 0; + else if ((i.tm.base_opcode | 1) == 0xc3) + { + /* Near ret. */ + *branch_p = align_branch_ret; + if ((align_branch & align_branch_ret_bit)) + add_padding = 1; + } + else + { + /* Check for indirect jmp, direct and indirect calls. */ + if (i.tm.base_opcode == 0xe8) + { + /* Direct call. */ + *branch_p = align_branch_call; + if ((align_branch & align_branch_call_bit)) + add_padding = 1; + } + else if (i.tm.base_opcode == 0xff + && (i.tm.extension_opcode == 2 + || i.tm.extension_opcode == 4)) + { + /* Indirect call and jmp. */ + *branch_p = align_branch_indirect; + if ((align_branch & align_branch_indirect_bit)) + add_padding = 1; + } + + if (add_padding + && i.disp_operands + && tls_get_addr + && (i.op[0].disps->X_op == O_symbol + || (i.op[0].disps->X_op == O_subtract + && i.op[0].disps->X_op_symbol == GOT_symbol))) + { + symbolS *s = i.op[0].disps->X_add_symbol; + /* No padding to call to global or undefined tls_get_addr. */ + if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s)) + && strcmp (S_GET_NAME (s), tls_get_addr) == 0) + return 0; + } + } + + if (add_padding + && last_insn.kind != last_insn_other + && last_insn.seg == now_seg) + { + if (flag_debug) + as_warn_where (last_insn.file, last_insn.line, + _("`%s` skips -malign-branch-boundary on `%s`"), + last_insn.name, i.tm.name); + return 0; + } + + return add_padding; +} + static void output_insn (void) { fragS *insn_start_frag; offsetT insn_start_off; + fragS *fragP = NULL; + enum align_branch_kind branch = align_branch_none; + +#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) + if (IS_ELF && x86_used_note) + { + if (i.tm.cpu_flags.bitfield.cpucmov) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_CMOV; + if (i.tm.cpu_flags.bitfield.cpusse) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE; + if (i.tm.cpu_flags.bitfield.cpusse2) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE2; + if (i.tm.cpu_flags.bitfield.cpusse3) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE3; + if (i.tm.cpu_flags.bitfield.cpussse3) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSSE3; + if (i.tm.cpu_flags.bitfield.cpusse4_1) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_1; + if (i.tm.cpu_flags.bitfield.cpusse4_2) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_2; + if (i.tm.cpu_flags.bitfield.cpuavx) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX; + if (i.tm.cpu_flags.bitfield.cpuavx2) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX2; + if (i.tm.cpu_flags.bitfield.cpufma) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_FMA; + if (i.tm.cpu_flags.bitfield.cpuavx512f) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512F; + if (i.tm.cpu_flags.bitfield.cpuavx512cd) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512CD; + if (i.tm.cpu_flags.bitfield.cpuavx512er) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512ER; + if (i.tm.cpu_flags.bitfield.cpuavx512pf) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512PF; + if (i.tm.cpu_flags.bitfield.cpuavx512vl) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512VL; + if (i.tm.cpu_flags.bitfield.cpuavx512dq) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512DQ; + if (i.tm.cpu_flags.bitfield.cpuavx512bw) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512BW; + if (i.tm.cpu_flags.bitfield.cpuavx512_4fmaps) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4FMAPS; + if (i.tm.cpu_flags.bitfield.cpuavx512_4vnniw) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4VNNIW; + if (i.tm.cpu_flags.bitfield.cpuavx512_bitalg) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BITALG; + if (i.tm.cpu_flags.bitfield.cpuavx512ifma) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_IFMA; + if (i.tm.cpu_flags.bitfield.cpuavx512vbmi) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI; + if (i.tm.cpu_flags.bitfield.cpuavx512_vbmi2) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2; + if (i.tm.cpu_flags.bitfield.cpuavx512_vnni) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI; + if (i.tm.cpu_flags.bitfield.cpuavx512_bf16) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16; + + if (i.tm.cpu_flags.bitfield.cpu8087 + || i.tm.cpu_flags.bitfield.cpu287 + || i.tm.cpu_flags.bitfield.cpu387 + || i.tm.cpu_flags.bitfield.cpu687 + || i.tm.cpu_flags.bitfield.cpufisttp) + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87; + if (i.has_regmmx + || i.tm.base_opcode == 0xf77 /* emms */ + || i.tm.base_opcode == 0xf0e /* femms */) + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX; + if (i.has_regxmm) + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM; + if (i.has_regymm) + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM; + if (i.has_regzmm) + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM; + if (i.tm.cpu_flags.bitfield.cpufxsr) + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR; + if (i.tm.cpu_flags.bitfield.cpuxsave) + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE; + if (i.tm.cpu_flags.bitfield.cpuxsaveopt) + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT; + if (i.tm.cpu_flags.bitfield.cpuxsavec) + x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC; + } +#endif /* Tie dwarf2 debug info to the address at the start of the insn. We can't do this after the insn has been output as the current @@ -7687,13 +8583,38 @@ output_insn (void) insn_start_frag = frag_now; insn_start_off = frag_now_fix (); + if (add_branch_padding_frag_p (&branch)) + { + char *p; + /* Branch can be 8 bytes. Leave some room for prefixes. */ + unsigned int max_branch_padding_size = 14; + + /* Align section to boundary. */ + record_alignment (now_seg, align_branch_power); + + /* Make room for padding. */ + frag_grow (max_branch_padding_size); + + /* Start of the padding. */ + p = frag_more (0); + + fragP = frag_now; + + frag_var (rs_machine_dependent, max_branch_padding_size, 0, + ENCODE_RELAX_STATE (BRANCH_PADDING, 0), + NULL, 0, p); + + fragP->tc_frag_data.branch_type = branch; + fragP->tc_frag_data.max_bytes = max_branch_padding_size; + } + /* Output jumps. */ - if (i.tm.opcode_modifier.jump) + if (i.tm.opcode_modifier.jump == JUMP) output_branch (); - else if (i.tm.opcode_modifier.jumpbyte - || i.tm.opcode_modifier.jumpdword) + else if (i.tm.opcode_modifier.jump == JUMP_BYTE + || i.tm.opcode_modifier.jump == JUMP_DWORD) output_jump (); - else if (i.tm.opcode_modifier.jumpintersegment) + else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT) output_interseg_jump (); else { @@ -7704,12 +8625,9 @@ output_insn (void) unsigned int prefix; if (avoid_fence - && i.tm.base_opcode == 0xfae - && i.operands == 1 - && i.imm_operands == 1 - && (i.op[0].imms->X_add_number == 0xe8 - || i.op[0].imms->X_add_number == 0xf0 - || i.op[0].imms->X_add_number == 0xf8)) + && (i.tm.base_opcode == 0xfaee8 + || i.tm.base_opcode == 0xfaef0 + || i.tm.base_opcode == 0xfaef8)) { /* Encode lfence, mfence, and sfence as f0 83 04 24 00 lock addl $0x0, (%{re}sp). */ @@ -7728,6 +8646,41 @@ output_insn (void) i.prefix[LOCK_PREFIX] = 0; } + if (branch) + /* Skip if this is a branch. */ + ; + else if (add_fused_jcc_padding_frag_p ()) + { + /* Make room for padding. */ + frag_grow (MAX_FUSED_JCC_PADDING_SIZE); + p = frag_more (0); + + fragP = frag_now; + + frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0, + ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0), + NULL, 0, p); + + fragP->tc_frag_data.branch_type = align_branch_fused; + fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE; + } + else if (add_branch_prefix_frag_p ()) + { + unsigned int max_prefix_size = align_branch_prefix_size; + + /* Make room for padding. */ + frag_grow (max_prefix_size); + p = frag_more (0); + + fragP = frag_now; + + frag_var (rs_machine_dependent, max_prefix_size, 0, + ENCODE_RELAX_STATE (BRANCH_PREFIX, 0), + NULL, 0, p); + + fragP->tc_frag_data.max_bytes = max_prefix_size; + } + /* Since the VEX/EVEX prefix contains the implicit prefix, we don't need the explicit prefix. */ if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex) @@ -7738,17 +8691,17 @@ output_insn (void) if (i.tm.base_opcode & 0xff000000) { prefix = (i.tm.base_opcode >> 24) & 0xff; - add_prefix (prefix); + if (!i.tm.cpu_flags.bitfield.cpupadlock + || prefix != REPE_PREFIX_OPCODE + || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE)) + add_prefix (prefix); } break; case 2: if ((i.tm.base_opcode & 0xff0000) != 0) { prefix = (i.tm.base_opcode >> 16) & 0xff; - if (!i.tm.cpu_flags.bitfield.cpupadlock - || prefix != REPE_PREFIX_OPCODE - || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE)) - add_prefix (prefix); + add_prefix (prefix); } break; case 1: @@ -7863,6 +8816,115 @@ output_insn (void) if (i.imm_operands) output_imm (insn_start_frag, insn_start_off); + + /* + * frag_now_fix () returning plain abs_section_offset when we're in the + * absolute section, and abs_section_offset not getting updated as data + * gets added to the frag breaks the logic below. + */ + if (now_seg != absolute_section) + { + j = encoding_length (insn_start_frag, insn_start_off, frag_more (0)); + if (j > 15) + as_warn (_("instruction length of %u bytes exceeds the limit of 15"), + j); + else if (fragP) + { + /* NB: Don't add prefix with GOTPC relocation since + output_disp() above depends on the fixed encoding + length. Can't add prefix with TLS relocation since + it breaks TLS linker optimization. */ + unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j; + /* Prefix count on the current instruction. */ + unsigned int count = i.vex.length; + unsigned int k; + for (k = 0; k < ARRAY_SIZE (i.prefix); k++) + /* REX byte is encoded in VEX/EVEX prefix. */ + if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length)) + count++; + + /* Count prefixes for extended opcode maps. */ + if (!i.vex.length) + switch (i.tm.opcode_length) + { + case 3: + if (((i.tm.base_opcode >> 16) & 0xff) == 0xf) + { + count++; + switch ((i.tm.base_opcode >> 8) & 0xff) + { + case 0x38: + case 0x3a: + count++; + break; + default: + break; + } + } + break; + case 2: + if (((i.tm.base_opcode >> 8) & 0xff) == 0xf) + count++; + break; + case 1: + break; + default: + abort (); + } + + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) + == BRANCH_PREFIX) + { + /* Set the maximum prefix size in BRANCH_PREFIX + frag. */ + if (fragP->tc_frag_data.max_bytes > max) + fragP->tc_frag_data.max_bytes = max; + if (fragP->tc_frag_data.max_bytes > count) + fragP->tc_frag_data.max_bytes -= count; + else + fragP->tc_frag_data.max_bytes = 0; + } + else + { + /* Remember the maximum prefix size in FUSED_JCC_PADDING + frag. */ + unsigned int max_prefix_size; + if (align_branch_prefix_size > max) + max_prefix_size = max; + else + max_prefix_size = align_branch_prefix_size; + if (max_prefix_size > count) + fragP->tc_frag_data.max_prefix_length + = max_prefix_size - count; + } + + /* Use existing segment prefix if possible. Use CS + segment prefix in 64-bit mode. In 32-bit mode, use SS + segment prefix with ESP/EBP base register and use DS + segment prefix without ESP/EBP base register. */ + if (i.prefix[SEG_PREFIX]) + fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX]; + else if (flag_code == CODE_64BIT) + fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE; + else if (i.base_reg + && (i.base_reg->reg_num == 4 + || i.base_reg->reg_num == 5)) + fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE; + else + fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE; + } + } + } + + /* NB: Don't work with COND_JUMP86 without i386. */ + if (align_branch_power + && now_seg != absolute_section + && cpu_arch_flags.bitfield.cpui386) + { + /* Terminate each frag so that we can add prefix and check for + fused jcc. */ + frag_wane (frag_now); + frag_new (0); } #ifdef DEBUG386 @@ -7919,7 +8981,8 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) int size = disp_size (n); offsetT val = i.op[n].disps->X_add_number; - val = offset_in_range (val >> i.memshift, size); + val = offset_in_range (val >> (size == 1 ? i.memshift : 0), + size); p = frag_more (size); md_number_to_chars (p, val, size); } @@ -7970,25 +9033,12 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) == O_subtract)))) || reloc_type == BFD_RELOC_32_PCREL)) { - offsetT add; - - if (insn_start_frag == frag_now) - add = (p - frag_now->fr_literal) - insn_start_off; - else - { - fragS *fr; - - add = insn_start_frag->fr_fix - insn_start_off; - for (fr = insn_start_frag->fr_next; - fr && fr != frag_now; fr = fr->fr_next) - add += fr->fr_fix; - add += p - frag_now->fr_literal; - } - if (!object_64bit) { reloc_type = BFD_RELOC_386_GOTPC; - i.op[n].imms->X_add_number += add; + i.has_gotpc_tls_reloc = TRUE; + i.op[n].imms->X_add_number += + encoding_length (insn_start_frag, insn_start_off, p); } else if (reloc_type == BFD_RELOC_64) reloc_type = BFD_RELOC_X86_64_GOTPC64; @@ -7998,18 +9048,40 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) insn, and that is taken care of in other code. */ reloc_type = BFD_RELOC_X86_64_GOTPC32; } + else if (align_branch_power) + { + switch (reloc_type) + { + case BFD_RELOC_386_TLS_GD: + case BFD_RELOC_386_TLS_LDM: + case BFD_RELOC_386_TLS_IE: + case BFD_RELOC_386_TLS_IE_32: + case BFD_RELOC_386_TLS_GOTIE: + case BFD_RELOC_386_TLS_GOTDESC: + case BFD_RELOC_386_TLS_DESC_CALL: + case BFD_RELOC_X86_64_TLSGD: + case BFD_RELOC_X86_64_TLSLD: + case BFD_RELOC_X86_64_GOTTPOFF: + case BFD_RELOC_X86_64_GOTPC32_TLSDESC: + case BFD_RELOC_X86_64_TLSDESC_CALL: + i.has_gotpc_tls_reloc = TRUE; + default: + break; + } + } fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size, i.op[n].disps, pcrel, reloc_type); /* Check for "call/jmp *mem", "mov mem, %reg", "test %reg, mem" and "binop mem, %reg" where binop is one of adc, add, and, cmp, or, sbb, sub, xor - instructions. Always generate R_386_GOT32X for - "sym*GOT" operand in 32-bit mode. */ - if ((generate_relax_relocations - || (!object_64bit - && i.rm.mode == 0 - && i.rm.regmem == 5)) + instructions without data prefix. Always generate + R_386_GOT32X for "sym*GOT" operand in 32-bit mode. */ + if (i.prefix[DATA_PREFIX] == 0 + && (generate_relax_relocations + || (!object_64bit + && i.rm.mode == 0 + && i.rm.regmem == 5)) && (i.rm.mode == 2 || (i.rm.mode == 0 && i.rm.regmem == 5)) && ((i.operands == 1 @@ -8024,8 +9096,7 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) { fixP->fx_tcbit = i.rex != 0; if (i.base_reg - && (i.base_reg->reg_num == RegRip - || i.base_reg->reg_num == RegEip)) + && (i.base_reg->reg_num == RegIP)) fixP->fx_tcbit2 = 1; } else @@ -8129,32 +9200,19 @@ output_imm (fragS *insn_start_frag, offsetT insn_start_off) && GOT_symbol == i.op[n].imms->X_add_symbol && (i.op[n].imms->X_op == O_symbol || (i.op[n].imms->X_op == O_add - && ((symbol_get_value_expression - (i.op[n].imms->X_op_symbol)->X_op) - == O_subtract)))) - { - offsetT add; - - if (insn_start_frag == frag_now) - add = (p - frag_now->fr_literal) - insn_start_off; - else - { - fragS *fr; - - add = insn_start_frag->fr_fix - insn_start_off; - for (fr = insn_start_frag->fr_next; - fr && fr != frag_now; fr = fr->fr_next) - add += fr->fr_fix; - add += p - frag_now->fr_literal; - } - + && ((symbol_get_value_expression + (i.op[n].imms->X_op_symbol)->X_op) + == O_subtract)))) + { if (!object_64bit) reloc_type = BFD_RELOC_386_GOTPC; else if (size == 4) reloc_type = BFD_RELOC_X86_64_GOTPC32; else if (size == 8) reloc_type = BFD_RELOC_X86_64_GOTPC64; - i.op[n].imms->X_add_number += add; + i.has_gotpc_tls_reloc = TRUE; + i.op[n].imms->X_add_number += + encoding_length (insn_start_frag, insn_start_off, p); } fix_new_exp (frag_now, p - frag_now->fr_literal, size, i.op[n].imms, 0, reloc_type); @@ -8505,6 +9563,15 @@ x86_cons (expressionS *exp, int size) as_bad (_("missing or invalid expression `%s'"), save); *input_line_pointer = c; } + else if ((got_reloc == BFD_RELOC_386_PLT32 + || got_reloc == BFD_RELOC_X86_64_PLT32) + && exp->X_op != O_symbol) + { + char c = *input_line_pointer; + *input_line_pointer = 0; + as_bad (_("invalid PLT expression `%s'"), save); + *input_line_pointer = c; + } } } else @@ -8595,13 +9662,14 @@ check_VecOperations (char *op_string, char *op_end) broadcast_op.type = bcst_type; broadcast_op.operand = this_operand; + broadcast_op.bytes = 0; i.broadcast = &broadcast_op; } /* Check masking operation. */ else if ((mask = parse_register (op_string, &end_op)) != NULL) { /* k0 can't be used for write mask. */ - if (!mask->reg_type.bitfield.regmask || mask->reg_num == 0) + if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num) { as_bad (_("`%s%s' can't be used for write mask"), register_prefix, mask->reg_name); @@ -8879,11 +9947,12 @@ i386_displacement (char *disp_start, char *disp_end) } operand_type_set (&bigdisp, 0); - if ((i.types[this_operand].bitfield.jumpabsolute) - || (!current_templates->start->opcode_modifier.jump - && !current_templates->start->opcode_modifier.jumpdword)) + if (i.jumpabsolute + || i.types[this_operand].bitfield.baseindex + || (current_templates->start->opcode_modifier.jump != JUMP + && current_templates->start->opcode_modifier.jump != JUMP_DWORD)) { - bigdisp.bitfield.disp32 = 1; + i386_addressing_mode (); override = (i.prefix[ADDR_PREFIX] != 0); if (flag_code == CODE_64BIT) { @@ -8892,27 +9961,47 @@ i386_displacement (char *disp_start, char *disp_end) bigdisp.bitfield.disp32s = 1; bigdisp.bitfield.disp64 = 1; } + else + bigdisp.bitfield.disp32 = 1; } else if ((flag_code == CODE_16BIT) ^ override) - { - bigdisp.bitfield.disp32 = 0; bigdisp.bitfield.disp16 = 1; - } + else + bigdisp.bitfield.disp32 = 1; } else { - /* For PC-relative branches, the width of the displacement - is dependent upon data size, not address size. */ + /* For PC-relative branches, the width of the displacement may be + dependent upon data size, but is never dependent upon address size. + Also make sure to not unintentionally match against a non-PC-relative + branch template. */ + static templates aux_templates; + const insn_template *t = current_templates->start; + bfd_boolean has_intel64 = FALSE; + + aux_templates.start = t; + while (++t < current_templates->end) + { + if (t->opcode_modifier.jump + != current_templates->start->opcode_modifier.jump) + break; + if (t->opcode_modifier.intel64) + has_intel64 = TRUE; + } + if (t < current_templates->end) + { + aux_templates.end = t; + current_templates = &aux_templates; + } + override = (i.prefix[DATA_PREFIX] != 0); if (flag_code == CODE_64BIT) { - if (override || i.suffix == WORD_MNEM_SUFFIX) + if ((override || i.suffix == WORD_MNEM_SUFFIX) + && (!intel64 || !has_intel64)) bigdisp.bitfield.disp16 = 1; else - { - bigdisp.bitfield.disp32 = 1; - bigdisp.bitfield.disp32s = 1; - } + bigdisp.bitfield.disp32s = 1; } else { @@ -9085,6 +10174,11 @@ i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp, } #endif + if (current_templates->start->opcode_modifier.jump == JUMP_BYTE + /* Constants get taken care of by optimize_disp(). */ + && exp->X_op != O_constant) + i.types[this_operand].bitfield.disp8 = 1; + /* Check if this is a displacement only operand. */ bigdisp = i.types[this_operand]; bigdisp.bitfield.disp8 = 0; @@ -9125,9 +10219,7 @@ i386_addressing_mode (void) if (addr_reg) { - if (addr_reg->reg_num == RegEip - || addr_reg->reg_num == RegEiz - || addr_reg->reg_type.bitfield.dword) + if (addr_reg->reg_type.bitfield.dword) addr_mode = CODE_32BIT; else if (flag_code != CODE_64BIT && addr_reg->reg_type.bitfield.word) @@ -9167,7 +10259,7 @@ i386_index_check (const char *operand_string) enum flag_code addr_mode = i386_addressing_mode (); if (current_templates->start->opcode_modifier.isstring - && !current_templates->start->opcode_modifier.immext + && !current_templates->start->cpu_flags.bitfield.cpupadlock && (current_templates->end[-1].opcode_modifier.isstring || i.mem_operands)) { @@ -9186,16 +10278,16 @@ i386_index_check (const char *operand_string) if (current_templates->start->opcode_modifier.repprefixok) { - i386_operand_type type = current_templates->end[-1].operand_types[0]; + int es_op = current_templates->end[-1].opcode_modifier.isstring + - IS_STRING_ES_OP0; + int op = 0; - if (!type.bitfield.baseindex + if (!current_templates->end[-1].operand_types[0].bitfield.baseindex || ((!i.mem_operands != !intel_syntax) && current_templates->end[-1].operand_types[1] .bitfield.baseindex)) - type = current_templates->end[-1].operand_types[1]; - expected_reg = hash_find (reg_hash, - di_si[addr_mode][type.bitfield.esseg]); - + op = 1; + expected_reg = hash_find (reg_hash, di_si[addr_mode][op == es_op]); } else expected_reg = hash_find (reg_hash, bx[addr_mode]); @@ -9237,21 +10329,18 @@ bad_address: { /* 32-bit/64-bit checks. */ if ((i.base_reg - && (addr_mode == CODE_64BIT - ? !i.base_reg->reg_type.bitfield.qword - : !i.base_reg->reg_type.bitfield.dword) - && (i.index_reg - || (i.base_reg->reg_num - != (addr_mode == CODE_64BIT ? RegRip : RegEip)))) + && ((addr_mode == CODE_64BIT + ? !i.base_reg->reg_type.bitfield.qword + : !i.base_reg->reg_type.bitfield.dword) + || (i.index_reg && i.base_reg->reg_num == RegIP) + || i.base_reg->reg_num == RegIZ)) || (i.index_reg && !i.index_reg->reg_type.bitfield.xmmword && !i.index_reg->reg_type.bitfield.ymmword && !i.index_reg->reg_type.bitfield.zmmword && ((addr_mode == CODE_64BIT - ? !(i.index_reg->reg_type.bitfield.qword - || i.index_reg->reg_num == RegRiz) - : !(i.index_reg->reg_type.bitfield.dword - || i.index_reg->reg_num == RegEiz)) + ? !i.index_reg->reg_type.bitfield.qword + : !i.index_reg->reg_type.bitfield.dword) || !i.index_reg->reg_type.bitfield.baseindex))) goto bad_address; @@ -9260,7 +10349,7 @@ bad_address: || (current_templates->start->base_opcode & ~1) == 0x0f1a) { /* They cannot use RIP-relative addressing. */ - if (i.base_reg && i.base_reg->reg_num == RegRip) + if (i.base_reg && i.base_reg->reg_num == RegIP) { as_bad (_("`%s' cannot be used here"), operand_string); return 0; @@ -9415,7 +10504,7 @@ i386_att_operand (char *operand_string) ++op_string; if (is_space_char (*op_string)) ++op_string; - i.types[this_operand].bitfield.jumpabsolute = 1; + i.jumpabsolute = TRUE; } /* Check if operand is a register. */ @@ -9428,9 +10517,7 @@ i386_att_operand (char *operand_string) op_string = end_op; if (is_space_char (*op_string)) ++op_string; - if (*op_string == ':' - && (r->reg_type.bitfield.sreg2 - || r->reg_type.bitfield.sreg3)) + if (*op_string == ':' && r->reg_type.bitfield.class == SReg) { switch (r->reg_num) { @@ -9473,7 +10560,7 @@ i386_att_operand (char *operand_string) ++op_string; if (is_space_char (*op_string)) ++op_string; - i.types[this_operand].bitfield.jumpabsolute = 1; + i.jumpabsolute = TRUE; } goto do_memory_reference; } @@ -9507,7 +10594,7 @@ i386_att_operand (char *operand_string) else if (*op_string == IMMEDIATE_PREFIX) { ++op_string; - if (i.types[this_operand].bitfield.jumpabsolute) + if (i.jumpabsolute) { as_bad (_("immediate operand illegal with absolute jump")); return 0; @@ -9699,7 +10786,8 @@ i386_att_operand (char *operand_string) /* Special case for (%dx) while doing input/output op. */ if (i.base_reg - && i.base_reg->reg_type.bitfield.inoutportreg + && i.base_reg->reg_type.bitfield.instance == RegD + && i.base_reg->reg_type.bitfield.word && i.index_reg == 0 && i.log2_scale_factor == 0 && i.seg[i.mem_operands] == 0 @@ -9711,7 +10799,7 @@ i386_att_operand (char *operand_string) if (i386_index_check (operand_string) == 0) return 0; - i.types[this_operand].bitfield.mem = 1; + i.flags[this_operand] |= Operand_Mem; if (i.mem_operands == 0) i.memop1_string = xstrdup (operand_string); i.mem_operands++; @@ -9774,6 +10862,362 @@ elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var) } #endif +/* Return the next non-empty frag. */ + +static fragS * +i386_next_non_empty_frag (fragS *fragP) +{ + /* There may be a frag with a ".fill 0" when there is no room in + the current frag for frag_grow in output_insn. */ + for (fragP = fragP->fr_next; + (fragP != NULL + && fragP->fr_type == rs_fill + && fragP->fr_fix == 0); + fragP = fragP->fr_next) + ; + return fragP; +} + +/* Return the next jcc frag after BRANCH_PADDING. */ + +static fragS * +i386_next_jcc_frag (fragS *fragP) +{ + if (!fragP) + return NULL; + + if (fragP->fr_type == rs_machine_dependent + && (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) + == BRANCH_PADDING)) + { + fragP = i386_next_non_empty_frag (fragP); + if (fragP->fr_type != rs_machine_dependent) + return NULL; + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == COND_JUMP) + return fragP; + } + + return NULL; +} + +/* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags. */ + +static void +i386_classify_machine_dependent_frag (fragS *fragP) +{ + fragS *cmp_fragP; + fragS *pad_fragP; + fragS *branch_fragP; + fragS *next_fragP; + unsigned int max_prefix_length; + + if (fragP->tc_frag_data.classified) + return; + + /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING. Convert + FUSED_JCC_PADDING and merge BRANCH_PADDING. */ + for (next_fragP = fragP; + next_fragP != NULL; + next_fragP = next_fragP->fr_next) + { + next_fragP->tc_frag_data.classified = 1; + if (next_fragP->fr_type == rs_machine_dependent) + switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)) + { + case BRANCH_PADDING: + /* The BRANCH_PADDING frag must be followed by a branch + frag. */ + branch_fragP = i386_next_non_empty_frag (next_fragP); + next_fragP->tc_frag_data.u.branch_fragP = branch_fragP; + break; + case FUSED_JCC_PADDING: + /* Check if this is a fused jcc: + FUSED_JCC_PADDING + CMP like instruction + BRANCH_PADDING + COND_JUMP + */ + cmp_fragP = i386_next_non_empty_frag (next_fragP); + pad_fragP = i386_next_non_empty_frag (cmp_fragP); + branch_fragP = i386_next_jcc_frag (pad_fragP); + if (branch_fragP) + { + /* The BRANCH_PADDING frag is merged with the + FUSED_JCC_PADDING frag. */ + next_fragP->tc_frag_data.u.branch_fragP = branch_fragP; + /* CMP like instruction size. */ + next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix; + frag_wane (pad_fragP); + /* Skip to branch_fragP. */ + next_fragP = branch_fragP; + } + else if (next_fragP->tc_frag_data.max_prefix_length) + { + /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't + a fused jcc. */ + next_fragP->fr_subtype + = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0); + next_fragP->tc_frag_data.max_bytes + = next_fragP->tc_frag_data.max_prefix_length; + /* This will be updated in the BRANCH_PREFIX scan. */ + next_fragP->tc_frag_data.max_prefix_length = 0; + } + else + frag_wane (next_fragP); + break; + } + } + + /* Stop if there is no BRANCH_PREFIX. */ + if (!align_branch_prefix_size) + return; + + /* Scan for BRANCH_PREFIX. */ + for (; fragP != NULL; fragP = fragP->fr_next) + { + if (fragP->fr_type != rs_machine_dependent + || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) + != BRANCH_PREFIX)) + continue; + + /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and + COND_JUMP_PREFIX. */ + max_prefix_length = 0; + for (next_fragP = fragP; + next_fragP != NULL; + next_fragP = next_fragP->fr_next) + { + if (next_fragP->fr_type == rs_fill) + /* Skip rs_fill frags. */ + continue; + else if (next_fragP->fr_type != rs_machine_dependent) + /* Stop for all other frags. */ + break; + + /* rs_machine_dependent frags. */ + if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == BRANCH_PREFIX) + { + /* Count BRANCH_PREFIX frags. */ + if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE) + { + max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE; + frag_wane (next_fragP); + } + else + max_prefix_length + += next_fragP->tc_frag_data.max_bytes; + } + else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == BRANCH_PADDING) + || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == FUSED_JCC_PADDING)) + { + /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING. */ + fragP->tc_frag_data.u.padding_fragP = next_fragP; + break; + } + else + /* Stop for other rs_machine_dependent frags. */ + break; + } + + fragP->tc_frag_data.max_prefix_length = max_prefix_length; + + /* Skip to the next frag. */ + fragP = next_fragP; + } +} + +/* Compute padding size for + + FUSED_JCC_PADDING + CMP like instruction + BRANCH_PADDING + COND_JUMP/UNCOND_JUMP + + or + + BRANCH_PADDING + COND_JUMP/UNCOND_JUMP + */ + +static int +i386_branch_padding_size (fragS *fragP, offsetT address) +{ + unsigned int offset, size, padding_size; + fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP; + + /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag. */ + if (!address) + address = fragP->fr_address; + address += fragP->fr_fix; + + /* CMP like instrunction size. */ + size = fragP->tc_frag_data.cmp_size; + + /* The base size of the branch frag. */ + size += branch_fragP->fr_fix; + + /* Add opcode and displacement bytes for the rs_machine_dependent + branch frag. */ + if (branch_fragP->fr_type == rs_machine_dependent) + size += md_relax_table[branch_fragP->fr_subtype].rlx_length; + + /* Check if branch is within boundary and doesn't end at the last + byte. */ + offset = address & ((1U << align_branch_power) - 1); + if ((offset + size) >= (1U << align_branch_power)) + /* Padding needed to avoid crossing boundary. */ + padding_size = (1U << align_branch_power) - offset; + else + /* No padding needed. */ + padding_size = 0; + + /* The return value may be saved in tc_frag_data.length which is + unsigned byte. */ + if (!fits_in_unsigned_byte (padding_size)) + abort (); + + return padding_size; +} + +/* i386_generic_table_relax_frag() + + Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to + grow/shrink padding to align branch frags. Hand others to + relax_frag(). */ + +long +i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch) +{ + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING) + { + long padding_size = i386_branch_padding_size (fragP, 0); + long grow = padding_size - fragP->tc_frag_data.length; + + /* When the BRANCH_PREFIX frag is used, the computed address + must match the actual address and there should be no padding. */ + if (fragP->tc_frag_data.padding_address + && (fragP->tc_frag_data.padding_address != fragP->fr_address + || padding_size)) + abort (); + + /* Update the padding size. */ + if (grow) + fragP->tc_frag_data.length = padding_size; + + return grow; + } + else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX) + { + fragS *padding_fragP, *next_fragP; + long padding_size, left_size, last_size; + + padding_fragP = fragP->tc_frag_data.u.padding_fragP; + if (!padding_fragP) + /* Use the padding set by the leading BRANCH_PREFIX frag. */ + return (fragP->tc_frag_data.length + - fragP->tc_frag_data.last_length); + + /* Compute the relative address of the padding frag in the very + first time where the BRANCH_PREFIX frag sizes are zero. */ + if (!fragP->tc_frag_data.padding_address) + fragP->tc_frag_data.padding_address + = padding_fragP->fr_address - (fragP->fr_address - stretch); + + /* First update the last length from the previous interation. */ + left_size = fragP->tc_frag_data.prefix_length; + for (next_fragP = fragP; + next_fragP != padding_fragP; + next_fragP = next_fragP->fr_next) + if (next_fragP->fr_type == rs_machine_dependent + && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == BRANCH_PREFIX)) + { + if (left_size) + { + int max = next_fragP->tc_frag_data.max_bytes; + if (max) + { + int size; + if (max > left_size) + size = left_size; + else + size = max; + left_size -= size; + next_fragP->tc_frag_data.last_length = size; + } + } + else + next_fragP->tc_frag_data.last_length = 0; + } + + /* Check the padding size for the padding frag. */ + padding_size = i386_branch_padding_size + (padding_fragP, (fragP->fr_address + + fragP->tc_frag_data.padding_address)); + + last_size = fragP->tc_frag_data.prefix_length; + /* Check if there is change from the last interation. */ + if (padding_size == last_size) + { + /* Update the expected address of the padding frag. */ + padding_fragP->tc_frag_data.padding_address + = (fragP->fr_address + padding_size + + fragP->tc_frag_data.padding_address); + return 0; + } + + if (padding_size > fragP->tc_frag_data.max_prefix_length) + { + /* No padding if there is no sufficient room. Clear the + expected address of the padding frag. */ + padding_fragP->tc_frag_data.padding_address = 0; + padding_size = 0; + } + else + /* Store the expected address of the padding frag. */ + padding_fragP->tc_frag_data.padding_address + = (fragP->fr_address + padding_size + + fragP->tc_frag_data.padding_address); + + fragP->tc_frag_data.prefix_length = padding_size; + + /* Update the length for the current interation. */ + left_size = padding_size; + for (next_fragP = fragP; + next_fragP != padding_fragP; + next_fragP = next_fragP->fr_next) + if (next_fragP->fr_type == rs_machine_dependent + && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == BRANCH_PREFIX)) + { + if (left_size) + { + int max = next_fragP->tc_frag_data.max_bytes; + if (max) + { + int size; + if (max > left_size) + size = left_size; + else + size = max; + left_size -= size; + next_fragP->tc_frag_data.length = size; + } + } + else + next_fragP->tc_frag_data.length = 0; + } + + return (fragP->tc_frag_data.length + - fragP->tc_frag_data.last_length); + } + return relax_frag (segment, fragP, stretch); +} + /* md_estimate_size_before_relax() Called just before relax() for rs_machine_dependent frags. The x86 @@ -9790,6 +11234,14 @@ elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var) int md_estimate_size_before_relax (fragS *fragP, segT segment) { + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING) + { + i386_classify_machine_dependent_frag (fragP); + return fragP->tc_frag_data.length; + } + /* We've already got fragP->fr_subtype right; all we have to do is check for un-relaxable symbols. On an ELF system, we can't relax an externally visible symbol, because it may be overridden by a @@ -9923,6 +11375,106 @@ md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED, unsigned int extension = 0; offsetT displacement_from_opcode_start; + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX) + { + /* Generate nop padding. */ + unsigned int size = fragP->tc_frag_data.length; + if (size) + { + if (size > fragP->tc_frag_data.max_bytes) + abort (); + + if (flag_debug) + { + const char *msg; + const char *branch = "branch"; + const char *prefix = ""; + fragS *padding_fragP; + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) + == BRANCH_PREFIX) + { + padding_fragP = fragP->tc_frag_data.u.padding_fragP; + switch (fragP->tc_frag_data.default_prefix) + { + default: + abort (); + break; + case CS_PREFIX_OPCODE: + prefix = " cs"; + break; + case DS_PREFIX_OPCODE: + prefix = " ds"; + break; + case ES_PREFIX_OPCODE: + prefix = " es"; + break; + case FS_PREFIX_OPCODE: + prefix = " fs"; + break; + case GS_PREFIX_OPCODE: + prefix = " gs"; + break; + case SS_PREFIX_OPCODE: + prefix = " ss"; + break; + } + if (padding_fragP) + msg = _("%s:%u: add %d%s at 0x%llx to align " + "%s within %d-byte boundary\n"); + else + msg = _("%s:%u: add additional %d%s at 0x%llx to " + "align %s within %d-byte boundary\n"); + } + else + { + padding_fragP = fragP; + msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align " + "%s within %d-byte boundary\n"); + } + + if (padding_fragP) + switch (padding_fragP->tc_frag_data.branch_type) + { + case align_branch_jcc: + branch = "jcc"; + break; + case align_branch_fused: + branch = "fused jcc"; + break; + case align_branch_jmp: + branch = "jmp"; + break; + case align_branch_call: + branch = "call"; + break; + case align_branch_indirect: + branch = "indiret branch"; + break; + case align_branch_ret: + branch = "ret"; + break; + default: + break; + } + + fprintf (stdout, msg, + fragP->fr_file, fragP->fr_line, size, prefix, + (long long) fragP->fr_address, branch, + 1 << align_branch_power); + } + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX) + memset (fragP->fr_opcode, + fragP->tc_frag_data.default_prefix, size); + else + i386_generate_nops (fragP, (char *) fragP->fr_opcode, + size, 0); + fragP->fr_fix += size; + } + return; + } + opcode = (unsigned char *) fragP->fr_opcode; /* Address we want to reach in file space. */ @@ -10123,9 +11675,11 @@ md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED) { case BFD_RELOC_386_PLT32: case BFD_RELOC_X86_64_PLT32: - /* Make the jump instruction point to the address of the operand. At - runtime we merely add the offset to the actual PLT entry. */ - value = -4; + /* Make the jump instruction point to the address of the operand. + At runtime we merely add the offset to the actual PLT entry. + NB: Subtract the offset size only for jump instructions. */ + if (fixP->fx_pcrel) + value = -4; break; case BFD_RELOC_386_TLS_GD: @@ -10291,19 +11845,20 @@ parse_real_register (char *reg_string, char **end_op) return (const reg_entry *) NULL; if ((r->reg_type.bitfield.dword - || r->reg_type.bitfield.sreg3 - || r->reg_type.bitfield.control - || r->reg_type.bitfield.debug - || r->reg_type.bitfield.test) + || (r->reg_type.bitfield.class == SReg && r->reg_num > 3) + || r->reg_type.bitfield.class == RegCR + || r->reg_type.bitfield.class == RegDR + || r->reg_type.bitfield.class == RegTR) && !cpu_arch_flags.bitfield.cpui386) return (const reg_entry *) NULL; - if (r->reg_type.bitfield.regmmx && !cpu_arch_flags.bitfield.cpummx) + if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx) return (const reg_entry *) NULL; if (!cpu_arch_flags.bitfield.cpuavx512f) { - if (r->reg_type.bitfield.zmmword || r->reg_type.bitfield.regmask) + if (r->reg_type.bitfield.zmmword + || r->reg_type.bitfield.class == RegMask) return (const reg_entry *) NULL; if (!cpu_arch_flags.bitfield.cpuavx) @@ -10316,19 +11871,18 @@ parse_real_register (char *reg_string, char **end_op) } } - if (r->reg_type.bitfield.regbnd && !cpu_arch_flags.bitfield.cpumpx) + if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx) return (const reg_entry *) NULL; /* Don't allow fake index register unless allow_index_reg isn't 0. */ - if (!allow_index_reg - && (r->reg_num == RegEiz || r->reg_num == RegRiz)) + if (!allow_index_reg && r->reg_num == RegIZ) return (const reg_entry *) NULL; /* Upper 16 vector registers are only available with VREX in 64bit mode, and require EVEX encoding. */ if (r->reg_flags & RegVRex) { - if (!cpu_arch_flags.bitfield.cpuvrex + if (!cpu_arch_flags.bitfield.cpuavx512f || flag_code != CODE_64BIT) return (const reg_entry *) NULL; @@ -10336,11 +11890,12 @@ parse_real_register (char *reg_string, char **end_op) } if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword) - && (!cpu_arch_flags.bitfield.cpulm || !r->reg_type.bitfield.control) + && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR) && flag_code != CODE_64BIT) return (const reg_entry *) NULL; - if (r->reg_type.bitfield.sreg3 && r->reg_num == RegFlat && !intel_syntax) + if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat + && !intel_syntax) return (const reg_entry *) NULL; return r; @@ -10476,6 +12031,12 @@ const char *md_shortopts = "qnO::"; #define OPTION_MAMD64 (OPTION_MD_BASE + 22) #define OPTION_MINTEL64 (OPTION_MD_BASE + 23) #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24) +#define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25) +#define OPTION_MVEXWIG (OPTION_MD_BASE + 26) +#define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27) +#define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28) +#define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29) +#define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30) struct option md_longopts[] = { @@ -10487,6 +12048,7 @@ struct option md_longopts[] = #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) {"x32", no_argument, NULL, OPTION_X32}, {"mshared", no_argument, NULL, OPTION_MSHARED}, + {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE}, #endif {"divide", no_argument, NULL, OPTION_DIVIDE}, {"march", required_argument, NULL, OPTION_MARCH}, @@ -10499,6 +12061,7 @@ struct option md_longopts[] = {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK}, {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK}, {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR}, + {"mvexwig", required_argument, NULL, OPTION_MVEXWIG}, {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX}, {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG}, {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG}, @@ -10509,6 +12072,10 @@ struct option md_longopts[] = {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD}, {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS}, {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG}, + {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY}, + {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE}, + {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH}, + {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES}, {"mamd64", no_argument, NULL, OPTION_MAMD64}, {"mintel64", no_argument, NULL, OPTION_MINTEL64}, {NULL, no_argument, NULL, 0} @@ -10519,7 +12086,7 @@ int md_parse_option (int c, const char *arg) { unsigned int j; - char *arch, *next, *saved; + char *arch, *next, *saved, *type; switch (c) { @@ -10535,6 +12102,8 @@ md_parse_option (int c, const char *arg) /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section should be emitted or not. FIXME: Not implemented. */ case 'Q': + if ((arg[0] != 'y' && arg[0] != 'n') || arg[1]) + return 0; break; /* -V: SVR4 argument to print version ID. */ @@ -10554,6 +12123,17 @@ md_parse_option (int c, const char *arg) case OPTION_MSHARED: shared = 1; break; + + case OPTION_X86_USED_NOTE: + if (strcasecmp (arg, "yes") == 0) + x86_used_note = 1; + else if (strcasecmp (arg, "no") == 0) + x86_used_note = 0; + else + as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg); + break; + + #endif #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \ || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)) @@ -10805,6 +12385,15 @@ md_parse_option (int c, const char *arg) as_fatal (_("invalid -mavxscalar= option: `%s'"), arg); break; + case OPTION_MVEXWIG: + if (strcmp (arg, "0") == 0) + vexwig = vexw0; + else if (strcmp (arg, "1") == 0) + vexwig = vexw1; + else + as_fatal (_("invalid -mvexwig= option: `%s'"), arg); + break; + case OPTION_MADD_BND_PREFIX: add_bnd_prefix = 1; break; @@ -10875,6 +12464,88 @@ md_parse_option (int c, const char *arg) as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg); break; + case OPTION_MALIGN_BRANCH_BOUNDARY: + { + char *end; + long int align = strtoul (arg, &end, 0); + if (*end == '\0') + { + if (align == 0) + { + align_branch_power = 0; + break; + } + else if (align >= 16) + { + int align_power; + for (align_power = 0; + (align & 1) == 0; + align >>= 1, align_power++) + continue; + /* Limit alignment power to 31. */ + if (align == 1 && align_power < 32) + { + align_branch_power = align_power; + break; + } + } + } + as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg); + } + break; + + case OPTION_MALIGN_BRANCH_PREFIX_SIZE: + { + char *end; + int align = strtoul (arg, &end, 0); + /* Some processors only support 5 prefixes. */ + if (*end == '\0' && align >= 0 && align < 6) + { + align_branch_prefix_size = align; + break; + } + as_fatal (_("invalid -malign-branch-prefix-size= value: %s"), + arg); + } + break; + + case OPTION_MALIGN_BRANCH: + align_branch = 0; + saved = xstrdup (arg); + type = saved; + do + { + next = strchr (type, '+'); + if (next) + *next++ = '\0'; + if (strcasecmp (type, "jcc") == 0) + align_branch |= align_branch_jcc_bit; + else if (strcasecmp (type, "fused") == 0) + align_branch |= align_branch_fused_bit; + else if (strcasecmp (type, "jmp") == 0) + align_branch |= align_branch_jmp_bit; + else if (strcasecmp (type, "call") == 0) + align_branch |= align_branch_call_bit; + else if (strcasecmp (type, "ret") == 0) + align_branch |= align_branch_ret_bit; + else if (strcasecmp (type, "indirect") == 0) + align_branch |= align_branch_indirect_bit; + else + as_fatal (_("invalid -malign-branch= option: `%s'"), arg); + type = next; + } + while (next != NULL); + free (saved); + break; + + case OPTION_MBRANCHES_WITH_32B_BOUNDARIES: + align_branch_power = 5; + align_branch_prefix_size = 5; + align_branch = (align_branch_jcc_bit + | align_branch_fused_bit + | align_branch_jmp_bit); + break; + case OPTION_MAMD64: intel64 = 0; break; @@ -10894,7 +12565,7 @@ md_parse_option (int c, const char *arg) { optimize_for_space = 1; /* Turn on all encoding optimizations. */ - optimize = -1; + optimize = INT_MAX; } else { @@ -11017,7 +12688,7 @@ md_show_usage (FILE *stream) { #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) fprintf (stream, _("\ - -Q ignored\n\ + -Qy, -Qn ignored\n\ -V print assembler version number\n\ -k ignored\n")); #endif @@ -11028,8 +12699,8 @@ md_show_usage (FILE *stream) fprintf (stream, _("\ -s ignored\n")); #endif -#if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \ - || defined (TE_PE) || defined (TE_PEP)) +#if defined BFD64 && (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \ + || defined (TE_PE) || defined (TE_PEP)) fprintf (stream, _("\ --32/--64/--x32 generate 32bit/64bit/x32 code\n")); #endif @@ -11053,52 +12724,95 @@ md_show_usage (FILE *stream) fprintf (stream, _("\ -msse2avx encode SSE instructions with VEX prefix\n")); fprintf (stream, _("\ - -msse-check=[none|error|warning]\n\ + -msse-check=[none|error|warning] (default: warning)\n\ check SSE instructions\n")); fprintf (stream, _("\ - -moperand-check=[none|error|warning]\n\ + -moperand-check=[none|error|warning] (default: warning)\n\ check operand combinations for validity\n")); fprintf (stream, _("\ - -mavxscalar=[128|256] encode scalar AVX instructions with specific vector\n\ + -mavxscalar=[128|256] (default: 128)\n\ + encode scalar AVX instructions with specific vector\n\ length\n")); fprintf (stream, _("\ - -mevexlig=[128|256|512] encode scalar EVEX instructions with specific vector\n\ + -mvexwig=[0|1] (default: 0)\n\ + encode VEX instructions with specific VEX.W value\n\ + for VEX.W bit ignored instructions\n")); + fprintf (stream, _("\ + -mevexlig=[128|256|512] (default: 128)\n\ + encode scalar EVEX instructions with specific vector\n\ length\n")); fprintf (stream, _("\ - -mevexwig=[0|1] encode EVEX instructions with specific EVEX.W value\n\ + -mevexwig=[0|1] (default: 0)\n\ + encode EVEX instructions with specific EVEX.W value\n\ for EVEX.W bit ignored instructions\n")); fprintf (stream, _("\ - -mevexrcig=[rne|rd|ru|rz]\n\ + -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\ encode EVEX instructions with specific EVEX.RC value\n\ for SAE-only ignored instructions\n")); fprintf (stream, _("\ - -mmnemonic=[att|intel] use AT&T/Intel mnemonic\n")); + -mmnemonic=[att|intel] ")); + if (SYSV386_COMPAT) + fprintf (stream, _("(default: att)\n")); + else + fprintf (stream, _("(default: intel)\n")); + fprintf (stream, _("\ + use AT&T/Intel mnemonic\n")); fprintf (stream, _("\ - -msyntax=[att|intel] use AT&T/Intel syntax\n")); + -msyntax=[att|intel] (default: att)\n\ + use AT&T/Intel syntax\n")); fprintf (stream, _("\ -mindex-reg support pseudo index registers\n")); fprintf (stream, _("\ -mnaked-reg don't require `%%' prefix for registers\n")); fprintf (stream, _("\ -madd-bnd-prefix add BND prefix for all valid branches\n")); +#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) fprintf (stream, _("\ -mshared disable branch optimization for shared code\n")); -# if defined (TE_PE) || defined (TE_PEP) + fprintf (stream, _("\ + -mx86-used-note=[no|yes] ")); + if (DEFAULT_X86_USED_NOTE) + fprintf (stream, _("(default: yes)\n")); + else + fprintf (stream, _("(default: no)\n")); + fprintf (stream, _("\ + generate x86 used ISA and feature properties\n")); +#endif +#if defined (TE_PE) || defined (TE_PEP) fprintf (stream, _("\ -mbig-obj generate big object files\n")); #endif fprintf (stream, _("\ - -momit-lock-prefix=[no|yes]\n\ + -momit-lock-prefix=[no|yes] (default: no)\n\ strip all lock prefixes\n")); fprintf (stream, _("\ - -mfence-as-lock-add=[no|yes]\n\ + -mfence-as-lock-add=[no|yes] (default: no)\n\ encode lfence, mfence and sfence as\n\ lock addl $0x0, (%%{re}sp)\n")); fprintf (stream, _("\ - -mrelax-relocations=[no|yes]\n\ + -mrelax-relocations=[no|yes] ")); + if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS) + fprintf (stream, _("(default: yes)\n")); + else + fprintf (stream, _("(default: no)\n")); + fprintf (stream, _("\ generate relax relocations\n")); fprintf (stream, _("\ - -mamd64 accept only AMD64 ISA\n")); + -malign-branch-boundary=NUM (default: 0)\n\ + align branches within NUM byte boundary\n")); + fprintf (stream, _("\ + -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\ + TYPE is combination of jcc, fused, jmp, call, ret,\n\ + indirect\n\ + specify types of branches to align\n")); + fprintf (stream, _("\ + -malign-branch-prefix-size=NUM (default: 5)\n\ + align branches with NUM prefixes per instruction\n")); + fprintf (stream, _("\ + -mbranches-within-32B-boundaries\n\ + align branches within 32 byte boundary\n")); + fprintf (stream, _("\ + -mamd64 accept only AMD64 ISA [default]\n")); fprintf (stream, _("\ -mintel64 accept only Intel64 ISA\n")); } @@ -11181,15 +12895,24 @@ i386_target_format (void) { default: format = ELF_TARGET_FORMAT; +#ifndef TE_SOLARIS + tls_get_addr = "___tls_get_addr"; +#endif break; case X86_64_ABI: use_rela_relocations = 1; object_64bit = 1; +#ifndef TE_SOLARIS + tls_get_addr = "__tls_get_addr"; +#endif format = ELF_TARGET_FORMAT64; break; case X86_64_X32_ABI: use_rela_relocations = 1; object_64bit = 1; +#ifndef TE_SOLARIS + tls_get_addr = "__tls_get_addr"; +#endif disallow_64bit_reloc = 1; format = ELF_TARGET_FORMAT32; break; @@ -11270,7 +12993,7 @@ md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size) work. */ int align; - align = bfd_get_section_alignment (stdoutput, segment); + align = bfd_section_alignment (segment); size = ((size + (1 << align) - 1) & (-((valueT) 1 << align))); } #endif @@ -11306,6 +13029,21 @@ s_bss (int ignore ATTRIBUTE_UNUSED) #endif +/* Remember constant directive. */ + +void +i386_cons_align (int ignore ATTRIBUTE_UNUSED) +{ + if (last_insn.kind != last_insn_directive + && (bfd_section_flags (now_seg) & SEC_CODE)) + { + last_insn.seg = now_seg; + last_insn.kind = last_insn_directive; + last_insn.name = "constant directive"; + last_insn.file = as_where (&last_insn.line); + } +} + void i386_validate_fix (fixS *fixp) { @@ -11713,8 +13451,7 @@ handle_large_common (int small ATTRIBUTE_UNUSED) /* The .lbss section is for local .largecomm symbols. */ lbss_section = subseg_new (".lbss", 0); applicable = bfd_applicable_section_flags (stdoutput); - bfd_set_section_flags (stdoutput, lbss_section, - applicable & SEC_ALLOC); + bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC); seg_info (lbss_section)->bss = 1; subseg_set (seg, subseg);