x86: further refine SSE check (SSE4a, SHA, GFNI)

[deliverable/binutils-gdb.git] / gas / config / tc-i386.c
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c

index 2bff48a778af7ce1e1baeb4bb0ca51d2e8e9fe89..accb6342038a5de8ea520fd19451fa96dce5bf77 100644 (file)
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1,5 +1,5 @@
  /* tc-i386.c -- Assemble code for the Intel 80386
-   Copyright (C) 1989-2018 Free Software Foundation, Inc.
+   Copyright (C) 1989-2019 Free Software Foundation, Inc.
  
     This file is part of GAS, the GNU Assembler.
  
@@ -33,6 +33,17 @@
  #include "elf/x86-64.h"
  #include "opcodes/i386-init.h"
  
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#else
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifndef INT_MAX
+#define INT_MAX (int) (((unsigned) (-1)) >> 1)
+#endif
+#endif
+
  #ifndef REGISTER_WARNINGS
  #define REGISTER_WARNINGS 1
  #endif
@@ -87,6 +98,9 @@
  
  #define END_OF_INSN '\0'
  
+/* This matches the C -> StaticRounding alias in the opcode table.  */
+#define commutative staticrounding
+
  /*
    'templates' is for grouping together 'template' structures for opcodes
    of the same name.  This is only used for storing the insns in the grand
@@ -339,6 +353,9 @@ struct _i386_insn
      unsigned int prefixes;
      unsigned char prefix[MAX_PREFIXES];
  
+    /* The operand to a branch insn indicates an absolute branch.  */
+    bfd_boolean jumpabsolute;
+
      /* Has MMX register operands.  */
      bfd_boolean has_regmmx;
  
@@ -684,6 +701,13 @@ static enum
      vex256
    } avxscalar;
  
+/* Encode VEX WIG instructions with specific vex.w.  */
+static enum
+  {
+    vexw0 = 0,
+    vexw1
+  } vexwig;
+
  /* Encode scalar EVEX LIG instructions with specific vector length.  */
  static enum
    {
@@ -1062,6 +1086,16 @@ static const arch_entry cpu_arch[] =
      CPU_MOVDIRI_FLAGS, 0 },
    { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
      CPU_MOVDIR64B_FLAGS, 0 },
+  { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
+    CPU_AVX512_BF16_FLAGS, 0 },
+  { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
+    CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
+  { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
+    CPU_ENQCMD_FLAGS, 0 },
+  { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
+    CPU_RDPRU_FLAGS, 0 },
+  { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
+    CPU_MCOMMIT_FLAGS, 0 },
  };
  
  static const noarch_entry cpu_noarch[] =
@@ -1101,6 +1135,9 @@ static const noarch_entry cpu_noarch[] =
    { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
    { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
    { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
+  { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
+  { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
+  { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
  };
  
  #ifdef I386COFF
@@ -1278,7 +1315,16 @@ i386_output_nops (char *where, const unsigned char *const *patt,
    /* Place the longer NOP first.  */
    int last;
    int offset;
-  const unsigned char *nops =  patt[max_single_nop_size - 1];
+  const unsigned char *nops;
+
+  if (max_single_nop_size < 1)
+    {
+      as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
+               max_single_nop_size);
+      return;
+    }
+
+  nops = patt[max_single_nop_size - 1];
  
    /* Use the smaller one if the requsted one isn't available.  */
    if (nops == NULL)
@@ -1568,6 +1614,9 @@ operand_type_set (union i386_operand_type *x, unsigned int v)
      default:
        abort ();
      }
+
+  x->bitfield.class = ClassNone;
+  x->bitfield.instance = InstanceNone;
  }
  
  static INLINE int
@@ -1782,6 +1831,11 @@ cpu_flags_match (const insn_template *t)
  static INLINE i386_operand_type
  operand_type_and (i386_operand_type x, i386_operand_type y)
  {
+  if (x.bitfield.class != y.bitfield.class)
+    x.bitfield.class = ClassNone;
+  if (x.bitfield.instance != y.bitfield.instance)
+    x.bitfield.instance = InstanceNone;
+
    switch (ARRAY_SIZE (x.array))
      {
      case 3:
@@ -1802,6 +1856,9 @@ operand_type_and (i386_operand_type x, i386_operand_type y)
  static INLINE i386_operand_type
  operand_type_and_not (i386_operand_type x, i386_operand_type y)
  {
+  gas_assert (y.bitfield.class == ClassNone);
+  gas_assert (y.bitfield.instance == InstanceNone);
+
    switch (ARRAY_SIZE (x.array))
      {
      case 3:
@@ -1822,6 +1879,13 @@ operand_type_and_not (i386_operand_type x, i386_operand_type y)
  static INLINE i386_operand_type
  operand_type_or (i386_operand_type x, i386_operand_type y)
  {
+  gas_assert (x.bitfield.class == ClassNone ||
+              y.bitfield.class == ClassNone ||
+              x.bitfield.class == y.bitfield.class);
+  gas_assert (x.bitfield.instance == InstanceNone ||
+              y.bitfield.instance == InstanceNone ||
+              x.bitfield.instance == y.bitfield.instance);
+
    switch (ARRAY_SIZE (x.array))
      {
      case 3:
@@ -1842,6 +1906,9 @@ operand_type_or (i386_operand_type x, i386_operand_type y)
  static INLINE i386_operand_type
  operand_type_xor (i386_operand_type x, i386_operand_type y)
  {
+  gas_assert (y.bitfield.class == ClassNone);
+  gas_assert (y.bitfield.instance == InstanceNone);
+
    switch (ARRAY_SIZE (x.array))
      {
      case 3:
@@ -1859,14 +1926,12 @@ operand_type_xor (i386_operand_type x, i386_operand_type y)
    return x;
  }
  
-static const i386_operand_type acc32 = OPERAND_TYPE_ACC32;
-static const i386_operand_type acc64 = OPERAND_TYPE_ACC64;
  static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
  static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
  static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
  static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
-static const i386_operand_type anydisp
-  = OPERAND_TYPE_ANYDISP;
+static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
+static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
  static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
  static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
  static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
@@ -1878,7 +1943,6 @@ static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
  static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
  static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
  static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
-static const i386_operand_type vec_imm4 = OPERAND_TYPE_VEC_IMM4;
  
  enum operand_type
  {
@@ -1894,7 +1958,7 @@ operand_type_check (i386_operand_type t, enum operand_type c)
    switch (c)
      {
      case reg:
-      return t.bitfield.reg;
+      return t.bitfield.class == Reg;
  
      case imm:
        return (t.bitfield.imm8
@@ -1977,7 +2041,7 @@ match_mem_size (const insn_template *t, unsigned int wanted,
                   operands at the same time, some special casing is needed
                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
                   down-conversion vpmov*.  */
-              || ((t->operand_types[wanted].bitfield.regsimd
+              || ((t->operand_types[wanted].bitfield.class == RegSIMD
                     && !t->opcode_modifier.broadcast
                     && (t->operand_types[wanted].bitfield.byte
                         || t->operand_types[wanted].bitfield.word
@@ -2002,35 +2066,34 @@ operand_size_match (const insn_template *t)
  {
    unsigned int j, match = MATCH_STRAIGHT;
  
-  /* Don't check jump instructions.  */
+  /* Don't check non-absolute jump instructions.  */
    if (t->opcode_modifier.jump
-      || t->opcode_modifier.jumpbyte
-      || t->opcode_modifier.jumpdword
-      || t->opcode_modifier.jumpintersegment)
+      && t->opcode_modifier.jump != JUMP_ABSOLUTE)
      return match;
  
    /* Check memory and accumulator operand size.  */
    for (j = 0; j < i.operands; j++)
      {
-      if (!i.types[j].bitfield.reg && !i.types[j].bitfield.regsimd
-         && t->operand_types[j].bitfield.anysize)
+      if (i.types[j].bitfield.class != Reg
+         && i.types[j].bitfield.class != RegSIMD
+         && t->opcode_modifier.anysize)
         continue;
  
-      if (t->operand_types[j].bitfield.reg
+      if (t->operand_types[j].bitfield.class == Reg
           && !match_operand_size (t, j, j))
         {
           match = 0;
           break;
         }
  
-      if (t->operand_types[j].bitfield.regsimd
+      if (t->operand_types[j].bitfield.class == RegSIMD
           && !match_simd_size (t, j, j))
         {
           match = 0;
           break;
         }
  
-      if (t->operand_types[j].bitfield.acc
+      if (t->operand_types[j].bitfield.instance == Accum
           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
         {
           match = 0;
@@ -2059,15 +2122,15 @@ mismatch:
      {
        unsigned int given = i.operands - j - 1;
  
-      if (t->operand_types[j].bitfield.reg
+      if (t->operand_types[j].bitfield.class == Reg
           && !match_operand_size (t, j, given))
         goto mismatch;
  
-      if (t->operand_types[j].bitfield.regsimd
+      if (t->operand_types[j].bitfield.class == RegSIMD
           && !match_simd_size (t, j, given))
         goto mismatch;
  
-      if (t->operand_types[j].bitfield.acc
+      if (t->operand_types[j].bitfield.instance == Accum
           && (!match_operand_size (t, j, given)
               || !match_simd_size (t, j, given)))
         goto mismatch;
@@ -2085,7 +2148,6 @@ operand_type_match (i386_operand_type overlap,
  {
    i386_operand_type temp = overlap;
  
-  temp.bitfield.jumpabsolute = 0;
    temp.bitfield.unspecified = 0;
    temp.bitfield.byte = 0;
    temp.bitfield.word = 0;
@@ -2099,8 +2161,7 @@ operand_type_match (i386_operand_type overlap,
    if (operand_type_all_zero (&temp))
      goto mismatch;
  
-  if (given.bitfield.baseindex == overlap.bitfield.baseindex
-      && given.bitfield.jumpabsolute == overlap.bitfield.jumpabsolute)
+  if (given.bitfield.baseindex == overlap.bitfield.baseindex)
      return 1;
  
  mismatch:
@@ -2119,18 +2180,18 @@ operand_type_register_match (i386_operand_type g0,
                              i386_operand_type g1,
                              i386_operand_type t1)
  {
-  if (!g0.bitfield.reg
-      && !g0.bitfield.regsimd
+  if (g0.bitfield.class != Reg
+      && g0.bitfield.class != RegSIMD
        && (!operand_type_check (g0, anymem)
           || g0.bitfield.unspecified
-         || !t0.bitfield.regsimd))
+         || t0.bitfield.class != RegSIMD))
      return 1;
  
-  if (!g1.bitfield.reg
-      && !g1.bitfield.regsimd
+  if (g1.bitfield.class != Reg
+      && g1.bitfield.class != RegSIMD
        && (!operand_type_check (g1, anymem)
           || g1.bitfield.unspecified
-         || !t1.bitfield.regsimd))
+         || t1.bitfield.class != RegSIMD))
      return 1;
  
    if (g0.bitfield.byte == g1.bitfield.byte
@@ -2968,7 +3029,7 @@ static void pe (expressionS *);
  static void ps (symbolS *);
  
  static void
-pi (char *line, i386_insn *x)
+pi (const char *line, i386_insn *x)
  {
    unsigned int j;
  
@@ -2992,14 +3053,13 @@ pi (char *line, i386_insn *x)
        fprintf (stdout, "    #%d:  ", j + 1);
        pt (x->types[j]);
        fprintf (stdout, "\n");
-      if (x->types[j].bitfield.reg
-         || x->types[j].bitfield.regmmx
-         || x->types[j].bitfield.regsimd
-         || x->types[j].bitfield.sreg2
-         || x->types[j].bitfield.sreg3
-         || x->types[j].bitfield.control
-         || x->types[j].bitfield.debug
-         || x->types[j].bitfield.test)
+      if (x->types[j].bitfield.class == Reg
+         || x->types[j].bitfield.class == RegMMX
+         || x->types[j].bitfield.class == RegSIMD
+         || x->types[j].bitfield.class == SReg
+         || x->types[j].bitfield.class == RegCR
+         || x->types[j].bitfield.class == RegDR
+         || x->types[j].bitfield.class == RegTR)
         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
        if (operand_type_check (x->types[j], imm))
         pe (x->op[j].imms);
@@ -3069,6 +3129,10 @@ const type_names[] =
    { OPERAND_TYPE_REG16, "r16" },
    { OPERAND_TYPE_REG32, "r32" },
    { OPERAND_TYPE_REG64, "r64" },
+  { OPERAND_TYPE_ACC8, "acc8" },
+  { OPERAND_TYPE_ACC16, "acc16" },
+  { OPERAND_TYPE_ACC32, "acc32" },
+  { OPERAND_TYPE_ACC64, "acc64" },
    { OPERAND_TYPE_IMM8, "i8" },
    { OPERAND_TYPE_IMM8, "i8s" },
    { OPERAND_TYPE_IMM16, "i16" },
@@ -3089,16 +3153,12 @@ const type_names[] =
    { OPERAND_TYPE_DEBUG, "debug reg" },
    { OPERAND_TYPE_FLOATREG, "FReg" },
    { OPERAND_TYPE_FLOATACC, "FAcc" },
-  { OPERAND_TYPE_SREG2, "SReg2" },
-  { OPERAND_TYPE_SREG3, "SReg3" },
-  { OPERAND_TYPE_ACC, "Acc" },
-  { OPERAND_TYPE_JUMPABSOLUTE, "Jump Absolute" },
+  { OPERAND_TYPE_SREG, "SReg" },
    { OPERAND_TYPE_REGMMX, "rMMX" },
    { OPERAND_TYPE_REGXMM, "rXMM" },
    { OPERAND_TYPE_REGYMM, "rYMM" },
    { OPERAND_TYPE_REGZMM, "rZMM" },
    { OPERAND_TYPE_REGMASK, "Mask reg" },
-  { OPERAND_TYPE_ESSEG, "es" },
  };
  
  static void
@@ -3110,7 +3170,7 @@ pt (i386_operand_type t)
    for (j = 0; j < ARRAY_SIZE (type_names); j++)
      {
        a = operand_type_and (t, type_names[j].mask);
-      if (!operand_type_all_zero (&a))
+      if (operand_type_equal (&a, &type_names[j].mask))
         fprintf (stdout, "%s, ",  type_names[j].name);
      }
    fflush (stdout);
@@ -3353,6 +3413,7 @@ build_vex_prefix (const insn_template *t)
    unsigned int register_specifier;
    unsigned int implied_prefix;
    unsigned int vector_length;
+  unsigned int w;
  
    /* Check register specifier.  */
    if (i.vex.register_specifier)
@@ -3364,9 +3425,10 @@ build_vex_prefix (const insn_template *t)
    else
      register_specifier = 0xf;
  
-  /* Use 2-byte VEX prefix by swapping destination and source
-     operand.  */
-  if (i.vec_encoding != vex_encoding_vex3
+  /* Use 2-byte VEX prefix by swapping destination and source operand
+     if there are more than 1 register operand.  */
+  if (i.reg_operands > 1
+      && i.vec_encoding != vex_encoding_vex3
        && i.dir_encoding == dir_encoding_default
        && i.operands == i.reg_operands
        && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
@@ -3399,6 +3461,43 @@ build_vex_prefix (const insn_template *t)
         i.tm = t[1];
      }
  
+  /* Use 2-byte VEX prefix by swapping commutative source operands if there
+     are no memory operands and at least 3 register ones.  */
+  if (i.reg_operands >= 3
+      && i.vec_encoding != vex_encoding_vex3
+      && i.reg_operands == i.operands - i.imm_operands
+      && i.tm.opcode_modifier.vex
+      && i.tm.opcode_modifier.commutative
+      && (i.tm.opcode_modifier.sse2avx || optimize > 1)
+      && i.rex == REX_B
+      && i.vex.register_specifier
+      && !(i.vex.register_specifier->reg_flags & RegRex))
+    {
+      unsigned int xchg = i.operands - i.reg_operands;
+      union i386_op temp_op;
+      i386_operand_type temp_type;
+
+      gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F);
+      gas_assert (!i.tm.opcode_modifier.sae);
+      gas_assert (operand_type_equal (&i.types[i.operands - 2],
+                                      &i.types[i.operands - 3]));
+      gas_assert (i.rm.mode == 3);
+
+      temp_type = i.types[xchg];
+      i.types[xchg] = i.types[xchg + 1];
+      i.types[xchg + 1] = temp_type;
+      temp_op = i.op[xchg];
+      i.op[xchg] = i.op[xchg + 1];
+      i.op[xchg + 1] = temp_op;
+
+      i.rex = 0;
+      xchg = i.rm.regmem | 8;
+      i.rm.regmem = ~register_specifier & 0xf;
+      gas_assert (!(i.rm.regmem & 8));
+      i.vex.register_specifier += xchg - i.rm.regmem;
+      register_specifier = ~xchg & 0xf;
+    }
+
    if (i.tm.opcode_modifier.vex == VEXScalar)
      vector_length = avxscalar;
    else if (i.tm.opcode_modifier.vex == VEX256)
@@ -3438,10 +3537,18 @@ build_vex_prefix (const insn_template *t)
        abort ();
      }
  
+  /* Check the REX.W bit and VEXW.  */
+  if (i.tm.opcode_modifier.vexw == VEXWIG)
+    w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
+  else if (i.tm.opcode_modifier.vexw)
+    w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
+  else
+    w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
+
    /* Use 2-byte VEX prefix if possible.  */
-  if (i.vec_encoding != vex_encoding_vex3
+  if (w == 0
+      && i.vec_encoding != vex_encoding_vex3
        && i.tm.opcode_modifier.vexopcode == VEX0F
-      && i.tm.opcode_modifier.vexw != VEXW1
        && (i.rex & (REX_W | REX_X | REX_B)) == 0)
      {
        /* 2-byte VEX prefix.  */
@@ -3460,7 +3567,7 @@ build_vex_prefix (const insn_template *t)
    else
      {
        /* 3-byte VEX prefix.  */
-      unsigned int m, w;
+      unsigned int m;
  
        i.vex.length = 3;
  
@@ -3498,11 +3605,6 @@ build_vex_prefix (const insn_template *t)
          of RXB bits from REX.  */
        i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
  
-      /* Check the REX.W bit.  */
-      w = (i.rex & REX_W) ? 1 : 0;
-      if (i.tm.opcode_modifier.vexw == VEXW1)
-       w = 1;
-
        i.vex.bytes[2] = (w << 7
                         | register_specifier << 3
                         | vector_length << 2
@@ -3515,7 +3617,7 @@ is_evex_encoding (const insn_template *t)
  {
    return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
-        || t->opcode_modifier.staticrounding || t->opcode_modifier.sae;
+        || t->opcode_modifier.sae;
  }
  
  static INLINE bfd_boolean
@@ -3628,19 +3730,13 @@ build_evex_prefix (void)
    i.vrex &= ~vrex_used;
    gas_assert (i.vrex == 0);
  
-  /* Check the REX.W bit.  */
-  w = (i.rex & REX_W) ? 1 : 0;
-  if (i.tm.opcode_modifier.vexw)
-    {
-      if (i.tm.opcode_modifier.vexw == VEXW1)
-       w = 1;
-    }
-  /* If w is not set it means we are dealing with WIG instruction.  */
-  else if (!w)
-    {
-      if (evexwig == evexw1)
-        w = 1;
-    }
+  /* Check the REX.W bit and VEXW.  */
+  if (i.tm.opcode_modifier.vexw == VEXWIG)
+    w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
+  else if (i.tm.opcode_modifier.vexw)
+    w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
+  else
+    w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
  
    /* Encode the U bit.  */
    implied_prefix |= 0x4;
@@ -3751,52 +3847,6 @@ process_immext (void)
  {
    expressionS *exp;
  
-  if ((i.tm.cpu_flags.bitfield.cpusse3 || i.tm.cpu_flags.bitfield.cpusvme)
-      && i.operands > 0)
-    {
-      /* MONITOR/MWAIT as well as SVME instructions have fixed operands
-        with an opcode suffix which is coded in the same place as an
-        8-bit immediate field would be.
-        Here we check those operands and remove them afterwards.  */
-      unsigned int x;
-
-      for (x = 0; x < i.operands; x++)
-       if (register_number (i.op[x].regs) != x)
-         as_bad (_("can't use register '%s%s' as operand %d in '%s'."),
-                 register_prefix, i.op[x].regs->reg_name, x + 1,
-                 i.tm.name);
-
-      i.operands = 0;
-    }
-
-  if (i.tm.cpu_flags.bitfield.cpumwaitx && i.operands > 0)
-    {
-      /* MONITORX/MWAITX instructions have fixed operands with an opcode
-        suffix which is coded in the same place as an 8-bit immediate
-        field would be.
-        Here we check those operands and remove them afterwards.  */
-      unsigned int x;
-
-      if (i.operands != 3)
-       abort();
-
-      for (x = 0; x < 2; x++)
-       if (register_number (i.op[x].regs) != x)
-         goto bad_register_operand;
-
-      /* Check for third operand for mwaitx/monitorx insn.  */
-      if (register_number (i.op[x].regs)
-         != (x + (i.tm.extension_opcode == 0xfb)))
-       {
-bad_register_operand:
-         as_bad (_("can't use register '%s%s' as operand %d in '%s'."),
-                 register_prefix, i.op[x].regs->reg_name, x+1,
-                 i.tm.name);
-       }
-
-      i.operands = 0;
-    }
-
    /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
       which is coded in the same place as an 8-bit immediate field
       would be.  Here we fake an 8-bit immediate operand from the
@@ -3845,8 +3895,7 @@ check_hle (void)
                   i.tm.name);
           return 0;
         }
-      if (i.mem_operands == 0
-         || !operand_type_check (i.types[i.operands - 1], anymem))
+      if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
         {
           as_bad (_("memory destination needed for instruction `%s'"
                     " after `xrelease'"), i.tm.name);
@@ -3861,7 +3910,7 @@ check_hle (void)
  static void
  optimize_encoding (void)
  {
-  int j;
+  unsigned int j;
  
    if (optimize_for_space
        && i.reg_operands == 1
@@ -3902,7 +3951,7 @@ optimize_encoding (void)
                 && i.reg_operands == 1
                 && i.imm_operands == 1
                 && i.op[0].imms->X_op == O_constant
-               && ((i.tm.base_opcode == 0xb0
+               && ((i.tm.base_opcode == 0xb8
                      && i.tm.extension_opcode == None
                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
@@ -3912,8 +3961,11 @@ optimize_encoding (void)
                             || (i.tm.base_opcode == 0x80
                                 && i.tm.extension_opcode == 0x4)
                             || ((i.tm.base_opcode == 0xf6
-                                || i.tm.base_opcode == 0xc6)
-                               && i.tm.extension_opcode == 0x0)))))
+                                || (i.tm.base_opcode | 1) == 0xc7)
+                               && i.tm.extension_opcode == 0x0)))
+                   || (fits_in_imm7 (i.op[0].imms->X_add_number)
+                       && i.tm.base_opcode == 0x83
+                       && i.tm.extension_opcode == 0x4)))
                || (i.types[0].bitfield.qword
                    && ((i.reg_operands == 2
                         && i.op[0].regs == i.op[1].regs
@@ -3927,6 +3979,7 @@ optimize_encoding (void)
      {
        /* Optimize: -O:
            andq $imm31, %r64   -> andl $imm31, %r32
+          andq $imm7, %r64    -> andl $imm7, %r32
            testq $imm31, %r64  -> testl $imm31, %r32
            xorq %r64, %r64     -> xorl %r32, %r32
            subq %r64, %r64     -> subl %r32, %r32
@@ -3934,7 +3987,7 @@ optimize_encoding (void)
            movq $imm32, %r64   -> movl $imm32, %r32
          */
        i.tm.opcode_modifier.norex64 = 1;
-      if (i.tm.base_opcode == 0xb0 || i.tm.base_opcode == 0xc6)
+      if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
         {
           /* Handle
                movq $imm31, %r64   -> movl $imm31, %r32
@@ -3948,20 +4001,43 @@ optimize_encoding (void)
           i.types[0].bitfield.imm64 = 0;
           i.types[1].bitfield.dword = 1;
           i.types[1].bitfield.qword = 0;
-         if (i.tm.base_opcode == 0xc6)
+         if ((i.tm.base_opcode | 1) == 0xc7)
             {
               /* Handle
                    movq $imm31, %r64   -> movl $imm31, %r32
                */
-             i.tm.base_opcode = 0xb0;
+             i.tm.base_opcode = 0xb8;
               i.tm.extension_opcode = None;
+             i.tm.opcode_modifier.w = 0;
               i.tm.opcode_modifier.shortform = 1;
               i.tm.opcode_modifier.modrm = 0;
             }
         }
      }
    else if (optimize > 1
-          && i.reg_operands == 3
+          && !optimize_for_space
+          && i.reg_operands == 2
+          && i.op[0].regs == i.op[1].regs
+          && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
+              || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
+          && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
+    {
+      /* Optimize: -O2:
+          andb %rN, %rN  -> testb %rN, %rN
+          andw %rN, %rN  -> testw %rN, %rN
+          andq %rN, %rN  -> testq %rN, %rN
+          orb %rN, %rN   -> testb %rN, %rN
+          orw %rN, %rN   -> testw %rN, %rN
+          orq %rN, %rN   -> testq %rN, %rN
+
+          and outside of 64-bit mode
+
+          andl %rN, %rN  -> testl %rN, %rN
+          orl %rN, %rN   -> testl %rN, %rN
+       */
+      i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
+    }
+  else if (i.reg_operands == 3
            && i.op[0].regs == i.op[1].regs
            && !i.types[2].bitfield.xmmword
            && (i.tm.opcode_modifier.vex
@@ -3969,10 +4045,10 @@ optimize_encoding (void)
                    && !i.rounding
                    && is_evex_encoding (&i.tm)
                    && (i.vec_encoding != vex_encoding_evex
+                      || cpu_arch_isa_flags.bitfield.cpuavx512vl
                        || i.tm.cpu_flags.bitfield.cpuavx512vl
                        || (i.tm.operand_types[2].bitfield.zmmword
-                          && i.types[2].bitfield.ymmword)
-                      || cpu_arch_isa_flags.bitfield.cpuavx512vl)))
+                          && i.types[2].bitfield.ymmword))))
            && ((i.tm.base_opcode == 0x55
                 || i.tm.base_opcode == 0x6655
                 || i.tm.base_opcode == 0x66df
@@ -3989,15 +4065,15 @@ optimize_encoding (void)
                 || i.tm.base_opcode == 0x6647)
                && i.tm.extension_opcode == None))
      {
-      /* Optimize: -O2:
+      /* Optimize: -O1:
            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
            vpsubq and vpsubw:
              EVEX VOP %zmmM, %zmmM, %zmmN
                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              EVEX VOP %ymmM, %ymmM, %ymmN
                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              VEX VOP %ymmM, %ymmM, %ymmN
                -> VEX VOP %xmmM, %xmmM, %xmmN
            VOP, one of vpandn and vpxor:
@@ -4006,17 +4082,17 @@ optimize_encoding (void)
            VOP, one of vpandnd and vpandnq:
              EVEX VOP %zmmM, %zmmM, %zmmN
                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              EVEX VOP %ymmM, %ymmM, %ymmN
                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
            VOP, one of vpxord and vpxorq:
              EVEX VOP %zmmM, %zmmM, %zmmN
                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              EVEX VOP %ymmM, %ymmM, %ymmN
                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
            VOP, one of kxord and kxorq:
              VEX VOP %kM, %kM, %kN
                -> VEX kxorw %kM, %kM, %kN
@@ -4026,16 +4102,18 @@ optimize_encoding (void)
         */
        if (is_evex_encoding (&i.tm))
         {
-         if (i.vec_encoding == vex_encoding_evex)
-           i.tm.opcode_modifier.evex = EVEX128;
-         else
+         if (i.vec_encoding != vex_encoding_evex)
             {
               i.tm.opcode_modifier.vex = VEX128;
               i.tm.opcode_modifier.vexw = VEXW0;
               i.tm.opcode_modifier.evex = 0;
             }
+         else if (optimize > 1)
+           i.tm.opcode_modifier.evex = EVEX128;
+         else
+           return;
         }
-      else if (i.tm.operand_types[0].bitfield.regmask)
+      else if (i.tm.operand_types[0].bitfield.class == RegMask)
         {
           i.tm.base_opcode &= 0xff;
           i.tm.opcode_modifier.vexw = VEXW0;
@@ -4050,6 +4128,84 @@ optimize_encoding (void)
             i.types[j].bitfield.ymmword = 0;
           }
      }
+  else if (i.vec_encoding != vex_encoding_evex
+          && !i.types[0].bitfield.zmmword
+          && !i.types[1].bitfield.zmmword
+          && !i.mask
+          && !i.broadcast
+          && is_evex_encoding (&i.tm)
+          && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
+              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
+              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
+              || (i.tm.base_opcode & ~4) == 0x66db
+              || (i.tm.base_opcode & ~4) == 0x66eb)
+          && i.tm.extension_opcode == None)
+    {
+      /* Optimize: -O1:
+          VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
+          vmovdqu32 and vmovdqu64:
+            EVEX VOP %xmmM, %xmmN
+              -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
+            EVEX VOP %ymmM, %ymmN
+              -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
+            EVEX VOP %xmmM, mem
+              -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
+            EVEX VOP %ymmM, mem
+              -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
+            EVEX VOP mem, %xmmN
+              -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
+            EVEX VOP mem, %ymmN
+              -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
+          VOP, one of vpand, vpandn, vpor, vpxor:
+            EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
+              -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
+            EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
+              -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
+            EVEX VOP{d,q} mem, %xmmM, %xmmN
+              -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
+            EVEX VOP{d,q} mem, %ymmM, %ymmN
+              -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
+       */
+      for (j = 0; j < i.operands; j++)
+       if (operand_type_check (i.types[j], disp)
+           && i.op[j].disps->X_op == O_constant)
+         {
+           /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
+              has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
+              bytes, we choose EVEX Disp8 over VEX Disp32.  */
+           int evex_disp8, vex_disp8;
+           unsigned int memshift = i.memshift;
+           offsetT n = i.op[j].disps->X_add_number;
+
+           evex_disp8 = fits_in_disp8 (n);
+           i.memshift = 0;
+           vex_disp8 = fits_in_disp8 (n);
+           if (evex_disp8 != vex_disp8)
+             {
+               i.memshift = memshift;
+               return;
+             }
+
+           i.types[j].bitfield.disp8 = vex_disp8;
+           break;
+         }
+      if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+       i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
+      i.tm.opcode_modifier.vex
+       = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
+      i.tm.opcode_modifier.vexw = VEXW0;
+      /* VPAND, VPOR, and VPXOR are commutative.  */
+      if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df)
+       i.tm.opcode_modifier.commutative = 1;
+      i.tm.opcode_modifier.evex = 0;
+      i.tm.opcode_modifier.masking = 0;
+      i.tm.opcode_modifier.broadcast = 0;
+      i.tm.opcode_modifier.disp8memshift = 0;
+      i.memshift = 0;
+      if (j < i.operands)
+       i.types[j].bitfield.disp8
+         = fits_in_disp8 (i.op[j].disps->X_add_number);
+    }
  }
  
  /* This is the guts of the machine-dependent assembler.  LINE points to a
@@ -4130,14 +4286,17 @@ md_assemble (char *line)
    if (sse_check != check_none
        && !i.tm.opcode_modifier.noavx
        && !i.tm.cpu_flags.bitfield.cpuavx
+      && !i.tm.cpu_flags.bitfield.cpuavx512f
        && (i.tm.cpu_flags.bitfield.cpusse
           || i.tm.cpu_flags.bitfield.cpusse2
           || i.tm.cpu_flags.bitfield.cpusse3
           || i.tm.cpu_flags.bitfield.cpussse3
           || i.tm.cpu_flags.bitfield.cpusse4_1
           || i.tm.cpu_flags.bitfield.cpusse4_2
+         || i.tm.cpu_flags.bitfield.cpusse4a
           || i.tm.cpu_flags.bitfield.cpupclmul
           || i.tm.cpu_flags.bitfield.cpuaes
+         || i.tm.cpu_flags.bitfield.cpusha
           || i.tm.cpu_flags.bitfield.cpugfni))
      {
        (sse_check == check_warning
@@ -4179,7 +4338,7 @@ md_assemble (char *line)
        && (!i.tm.opcode_modifier.islockable
           || i.mem_operands == 0
           || (i.tm.base_opcode != 0x86
-             && !operand_type_check (i.types[i.operands - 1], anymem))))
+             && !(i.flags[i.operands - 1] & Operand_Mem))))
      {
        as_bad (_("expecting lockable instruction after `lock'"));
        return;
@@ -4227,8 +4386,9 @@ md_assemble (char *line)
      }
  
    /* Check string instruction segment overrides.  */
-  if (i.tm.opcode_modifier.isstring && i.mem_operands != 0)
+  if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
      {
+      gas_assert (i.mem_operands);
        if (!check_string ())
         return;
        i.disp_operands = 0;
@@ -4256,9 +4416,8 @@ md_assemble (char *line)
       with 3 operands or less.  */
    if (i.operands <= 3)
      for (j = 0; j < i.operands; j++)
-      if (i.types[j].bitfield.inoutportreg
-         || i.types[j].bitfield.shiftcount
-         || (i.types[j].bitfield.acc && !i.types[j].bitfield.xmmword))
+      if (i.types[j].bitfield.instance != InstanceNone
+         && !i.types[j].bitfield.xmmword)
         i.reg_operands--;
  
    /* ImmExt should be processed after SSE2AVX.  */
@@ -4280,9 +4439,9 @@ md_assemble (char *line)
  
    if (is_any_vex_encoding (&i.tm))
      {
-      if (flag_code == CODE_16BIT)
+      if (!cpu_arch_flags.bitfield.cpui286)
         {
-         as_bad (_("instruction `%s' isn't supported in 16-bit mode."),
+         as_bad (_("instruction `%s' isn't supported outside of protected mode."),
                   i.tm.name);
           return;
         }
@@ -4304,9 +4463,9 @@ md_assemble (char *line)
        i.imm_operands = 0;
      }
  
-  if ((i.tm.opcode_modifier.jump
-       || i.tm.opcode_modifier.jumpbyte
-       || i.tm.opcode_modifier.jumpdword)
+  if ((i.tm.opcode_modifier.jump == JUMP
+       || i.tm.opcode_modifier.jump == JUMP_BYTE
+       || i.tm.opcode_modifier.jump == JUMP_DWORD)
        && i.op[0].disps->X_op == O_constant)
      {
        /* Convert "jmp constant" (and "call constant") to a jump (call) to
@@ -4323,12 +4482,12 @@ md_assemble (char *line)
       instruction already has a prefix, we need to convert old
       registers to new ones.  */
  
-  if ((i.types[0].bitfield.reg && i.types[0].bitfield.byte
+  if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
         && (i.op[0].regs->reg_flags & RegRex64) != 0)
-      || (i.types[1].bitfield.reg && i.types[1].bitfield.byte
+      || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
           && (i.op[1].regs->reg_flags & RegRex64) != 0)
-      || (((i.types[0].bitfield.reg && i.types[0].bitfield.byte)
-          || (i.types[1].bitfield.reg && i.types[1].bitfield.byte))
+      || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
+          || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
           && i.rex != 0))
      {
        int x;
@@ -4337,7 +4496,7 @@ md_assemble (char *line)
        for (x = 0; x < 2; x++)
         {
           /* Look for 8 bit operand that uses old registers.  */
-         if (i.types[x].bitfield.reg && i.types[x].bitfield.byte
+         if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
               && (i.op[x].regs->reg_flags & RegRex64) == 0)
             {
               /* In case it is "hi" register, give up.  */
@@ -4362,7 +4521,7 @@ md_assemble (char *line)
          the REX_OPCODE byte.  */
        int x;
        for (x = 0; x < 2; x++)
-       if (i.types[x].bitfield.reg
+       if (i.types[x].bitfield.class == Reg
             && i.types[x].bitfield.byte
             && (i.op[x].regs->reg_flags & RegRex64) == 0
             && i.op[x].regs->reg_num > 3)
@@ -4444,10 +4603,10 @@ parse_insn (char *line, char *mnemonic)
             }
           /* If we are in 16-bit mode, do not allow addr16 or data16.
              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
-         if ((current_templates->start->opcode_modifier.size16
-              || current_templates->start->opcode_modifier.size32)
+         if ((current_templates->start->opcode_modifier.size == SIZE16
+              || current_templates->start->opcode_modifier.size == SIZE32)
               && flag_code != CODE_64BIT
-             && (current_templates->start->opcode_modifier.size32
+             && ((current_templates->start->opcode_modifier.size == SIZE32)
                   ^ (flag_code == CODE_16BIT)))
             {
               as_bad (_("redundant %s prefix"),
@@ -4555,46 +4714,50 @@ parse_insn (char *line, char *mnemonic)
    if (!current_templates)
      {
  check_suffix:
-      /* See if we can get a match by trimming off a suffix.  */
-      switch (mnem_p[-1])
+      if (mnem_p > mnemonic)
         {
-       case WORD_MNEM_SUFFIX:
-         if (intel_syntax && (intel_float_operand (mnemonic) & 2))
-           i.suffix = SHORT_MNEM_SUFFIX;
-         else
-           /* Fall through.  */
-       case BYTE_MNEM_SUFFIX:
-       case QWORD_MNEM_SUFFIX:
-         i.suffix = mnem_p[-1];
-         mnem_p[-1] = '\0';
-         current_templates = (const templates *) hash_find (op_hash,
-                                                             mnemonic);
-         break;
-       case SHORT_MNEM_SUFFIX:
-       case LONG_MNEM_SUFFIX:
-         if (!intel_syntax)
-           {
-             i.suffix = mnem_p[-1];
-             mnem_p[-1] = '\0';
-             current_templates = (const templates *) hash_find (op_hash,
-                                                                 mnemonic);
-           }
-         break;
-
-         /* Intel Syntax.  */
-       case 'd':
-         if (intel_syntax)
+         /* See if we can get a match by trimming off a suffix.  */
+         switch (mnem_p[-1])
             {
-             if (intel_float_operand (mnemonic) == 1)
+           case WORD_MNEM_SUFFIX:
+             if (intel_syntax && (intel_float_operand (mnemonic) & 2))
                 i.suffix = SHORT_MNEM_SUFFIX;
               else
-               i.suffix = LONG_MNEM_SUFFIX;
+               /* Fall through.  */
+             case BYTE_MNEM_SUFFIX:
+             case QWORD_MNEM_SUFFIX:
+               i.suffix = mnem_p[-1];
               mnem_p[-1] = '\0';
               current_templates = (const templates *) hash_find (op_hash,
-                                                                 mnemonic);
+                                                                mnemonic);
+             break;
+           case SHORT_MNEM_SUFFIX:
+           case LONG_MNEM_SUFFIX:
+             if (!intel_syntax)
+               {
+                 i.suffix = mnem_p[-1];
+                 mnem_p[-1] = '\0';
+                 current_templates = (const templates *) hash_find (op_hash,
+                                                                    mnemonic);
+               }
+             break;
+
+             /* Intel Syntax.  */
+           case 'd':
+             if (intel_syntax)
+               {
+                 if (intel_float_operand (mnemonic) == 1)
+                   i.suffix = SHORT_MNEM_SUFFIX;
+                 else
+                   i.suffix = LONG_MNEM_SUFFIX;
+                 mnem_p[-1] = '\0';
+                 current_templates = (const templates *) hash_find (op_hash,
+                                                                    mnemonic);
+               }
+             break;
             }
-         break;
         }
+
        if (!current_templates)
         {
           as_bad (_("no such instruction: `%s'"), token_start);
@@ -4602,8 +4765,8 @@ check_suffix:
         }
      }
  
-  if (current_templates->start->opcode_modifier.jump
-      || current_templates->start->opcode_modifier.jumpbyte)
+  if (current_templates->start->opcode_modifier.jump == JUMP
+      || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
      {
        /* Check for a branch hint.  We allow ",pt" and ",pn" for
          predict taken and predict not taken respectively.
@@ -4875,26 +5038,28 @@ optimize_imm (void)
    else if (i.reg_operands)
      {
        /* Figure out a suffix from the last register operand specified.
-        We can't do this properly yet, ie. excluding InOutPortReg,
-        but the following works for instructions with immediates.
-        In any case, we can't set i.suffix yet.  */
+        We can't do this properly yet, i.e. excluding special register
+        instances, but the following works for instructions with
+        immediates.  In any case, we can't set i.suffix yet.  */
        for (op = i.operands; --op >= 0;)
-       if (i.types[op].bitfield.reg && i.types[op].bitfield.byte)
+       if (i.types[op].bitfield.class != Reg)
+         continue;
+       else if (i.types[op].bitfield.byte)
           {
             guess_suffix = BYTE_MNEM_SUFFIX;
             break;
           }
-       else if (i.types[op].bitfield.reg && i.types[op].bitfield.word)
+       else if (i.types[op].bitfield.word)
           {
             guess_suffix = WORD_MNEM_SUFFIX;
             break;
           }
-       else if (i.types[op].bitfield.reg && i.types[op].bitfield.dword)
+       else if (i.types[op].bitfield.dword)
           {
             guess_suffix = LONG_MNEM_SUFFIX;
             break;
           }
-       else if (i.types[op].bitfield.reg && i.types[op].bitfield.qword)
+       else if (i.types[op].bitfield.qword)
           {
             guess_suffix = QWORD_MNEM_SUFFIX;
             break;
@@ -4983,8 +5148,10 @@ optimize_imm (void)
               for (t = current_templates->start;
                    t < current_templates->end;
                    ++t)
-               allowed = operand_type_or (allowed,
-                                          t->operand_types[op]);
+               {
+                 allowed = operand_type_or (allowed, t->operand_types[op]);
+                 allowed = operand_type_and (allowed, anyimm);
+               }
               switch (guess_suffix)
                 {
                 case QWORD_MNEM_SUFFIX:
@@ -5176,10 +5343,10 @@ check_VecOperands (const insn_template *t)
        gas_assert (i.reg_operands == 2 || i.mask);
        if (i.reg_operands == 2 && !i.mask)
         {
-         gas_assert (i.types[0].bitfield.regsimd);
+         gas_assert (i.types[0].bitfield.class == RegSIMD);
           gas_assert (i.types[0].bitfield.xmmword
                       || i.types[0].bitfield.ymmword);
-         gas_assert (i.types[2].bitfield.regsimd);
+         gas_assert (i.types[2].bitfield.class == RegSIMD);
           gas_assert (i.types[2].bitfield.xmmword
                       || i.types[2].bitfield.ymmword);
           if (operand_check == check_none)
@@ -5200,7 +5367,7 @@ check_VecOperands (const insn_template *t)
         }
        else if (i.reg_operands == 1 && i.mask)
         {
-         if (i.types[1].bitfield.regsimd
+         if (i.types[1].bitfield.class == RegSIMD
               && (i.types[1].bitfield.xmmword
                   || i.types[1].bitfield.ymmword
                   || i.types[1].bitfield.zmmword)
@@ -5290,7 +5457,7 @@ check_VecOperands (const insn_template *t)
      {
        /* Find memory operand.  */
        for (op = 0; op < i.operands; op++)
-       if (operand_type_check (i.types[op], anymem))
+       if (i.flags[op] & Operand_Mem)
           break;
        gas_assert (op < i.operands);
        /* Check size of the memory operand.  */
@@ -5349,11 +5516,8 @@ check_VecOperands (const insn_template *t)
    /* Check RC/SAE.  */
    if (i.rounding)
      {
-      if ((i.rounding->type != saeonly
-          && !t->opcode_modifier.staticrounding)
-         || (i.rounding->type == saeonly
-             && (t->opcode_modifier.staticrounding
-                 || !t->opcode_modifier.sae)))
+      if (!t->opcode_modifier.sae
+         || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
         {
           i.error = unsupported_rc_sae;
           return 1;
@@ -5383,7 +5547,7 @@ check_VecOperands (const insn_template *t)
  
           i.memshift = 0;
           for (op = 0; op < i.operands; op++)
-           if (operand_type_check (i.types[op], anymem))
+           if (i.flags[op] & Operand_Mem)
               {
                 if (t->opcode_modifier.evex == EVEXLIG)
                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
@@ -5394,7 +5558,7 @@ check_VecOperands (const insn_template *t)
                 else if (!i.types[op].bitfield.unspecified)
                   type = &i.types[op];
               }
-           else if (i.types[op].bitfield.regsimd
+           else if (i.types[op].bitfield.class == RegSIMD
                      && t->opcode_modifier.evex != EVEXLIG)
               {
                 if (i.types[op].bitfield.zmmword)
@@ -5466,8 +5630,8 @@ VEX_check_operands (const insn_template *t)
        return 0;
      }
  
-  /* Only check VEX_Imm4, which must be the first operand.  */
-  if (t->operand_types[0].bitfield.vec_imm4)
+  /* Check the special Imm4 cases; must be the first operand.  */
+  if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
      {
        if (i.op[0].imms->X_op != O_constant
           || !fits_in_imm4 (i.op[0].imms->X_add_number))
@@ -5476,8 +5640,8 @@ VEX_check_operands (const insn_template *t)
           return 1;
         }
  
-      /* Turn off Imm8 so that update_imm won't complain.  */
-      i.types[0] = vec_imm4;
+      /* Turn off Imm<N> so that update_imm won't complain.  */
+      operand_type_set (&i.types[0], 0);
      }
  
    return 0;
@@ -5491,7 +5655,7 @@ match_template (char mnem_suffix)
    i386_operand_type overlap0, overlap1, overlap2, overlap3;
    i386_operand_type overlap4;
    unsigned int found_reverse_match;
-  i386_opcode_modifier suffix_check, mnemsuf_check;
+  i386_opcode_modifier suffix_check;
    i386_operand_type operand_types [MAX_OPERANDS];
    int addr_prefix_disp;
    unsigned int j;
@@ -5506,33 +5670,33 @@ match_template (char mnem_suffix)
    found_reverse_match = 0;
    addr_prefix_disp = -1;
  
+  /* Prepare for mnemonic suffix check.  */
    memset (&suffix_check, 0, sizeof (suffix_check));
-  if (intel_syntax && i.broadcast)
-    /* nothing */;
-  else if (i.suffix == BYTE_MNEM_SUFFIX)
-    suffix_check.no_bsuf = 1;
-  else if (i.suffix == WORD_MNEM_SUFFIX)
-    suffix_check.no_wsuf = 1;
-  else if (i.suffix == SHORT_MNEM_SUFFIX)
-    suffix_check.no_ssuf = 1;
-  else if (i.suffix == LONG_MNEM_SUFFIX)
-    suffix_check.no_lsuf = 1;
-  else if (i.suffix == QWORD_MNEM_SUFFIX)
-    suffix_check.no_qsuf = 1;
-  else if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
-    suffix_check.no_ldsuf = 1;
-
-  memset (&mnemsuf_check, 0, sizeof (mnemsuf_check));
-  if (intel_syntax)
+  switch (mnem_suffix)
      {
-      switch (mnem_suffix)
-       {
-       case BYTE_MNEM_SUFFIX:  mnemsuf_check.no_bsuf = 1; break;
-       case WORD_MNEM_SUFFIX:  mnemsuf_check.no_wsuf = 1; break;
-       case SHORT_MNEM_SUFFIX: mnemsuf_check.no_ssuf = 1; break;
-       case LONG_MNEM_SUFFIX:  mnemsuf_check.no_lsuf = 1; break;
-       case QWORD_MNEM_SUFFIX: mnemsuf_check.no_qsuf = 1; break;
-       }
+    case BYTE_MNEM_SUFFIX:
+      suffix_check.no_bsuf = 1;
+      break;
+    case WORD_MNEM_SUFFIX:
+      suffix_check.no_wsuf = 1;
+      break;
+    case SHORT_MNEM_SUFFIX:
+      suffix_check.no_ssuf = 1;
+      break;
+    case LONG_MNEM_SUFFIX:
+      suffix_check.no_lsuf = 1;
+      break;
+    case QWORD_MNEM_SUFFIX:
+      suffix_check.no_qsuf = 1;
+      break;
+    default:
+      /* NB: In Intel syntax, normally we can check for memory operand
+        size when there is no mnemonic suffix.  But jmp and call have
+        2 different encodings with Dword memory operand size, one with
+        No_ldSuf and the other without.  i.suffix is set to
+        LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
+      if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
+       suffix_check.no_ldsuf = 1;
      }
  
    /* Must have right number of operands.  */
@@ -5566,29 +5730,32 @@ match_template (char mnem_suffix)
           || (!intel64 && t->opcode_modifier.intel64))
         continue;
  
-      /* Check the suffix, except for some instructions in intel mode.  */
+      /* Check the suffix.  */
        i.error = invalid_instruction_suffix;
-      if ((!intel_syntax || !t->opcode_modifier.ignoresize)
-         && ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
-             || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
-             || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
-             || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
-             || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
-             || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf)))
-       continue;
-      /* In Intel mode all mnemonic suffixes must be explicitly allowed.  */
-      if ((t->opcode_modifier.no_bsuf && mnemsuf_check.no_bsuf)
-         || (t->opcode_modifier.no_wsuf && mnemsuf_check.no_wsuf)
-         || (t->opcode_modifier.no_lsuf && mnemsuf_check.no_lsuf)
-         || (t->opcode_modifier.no_ssuf && mnemsuf_check.no_ssuf)
-         || (t->opcode_modifier.no_qsuf && mnemsuf_check.no_qsuf)
-         || (t->opcode_modifier.no_ldsuf && mnemsuf_check.no_ldsuf))
+      if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
+         || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
+         || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
+         || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
+         || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
+         || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
         continue;
  
        size_match = operand_size_match (t);
        if (!size_match)
         continue;
  
+      /* This is intentionally not
+
+        if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
+
+        as the case of a missing * on the operand is accepted (perhaps with
+        a warning, issued further down).  */
+      if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
+       {
+         i.error = operand_type_mismatch;
+         continue;
+       }
+
        for (j = 0; j < MAX_OPERANDS; j++)
         operand_types[j] = t->operand_types[j];
  
@@ -5600,10 +5767,10 @@ match_template (char mnem_suffix)
                  && !t->opcode_modifier.broadcast
                  && !intel_float_operand (t->name))
               : intel_float_operand (t->name) != 2)
-         && ((!operand_types[0].bitfield.regmmx
-              && !operand_types[0].bitfield.regsimd)
-             || (!operand_types[t->operands > 1].bitfield.regmmx
-                 && !operand_types[t->operands > 1].bitfield.regsimd))
+         && ((operand_types[0].bitfield.class != RegMMX
+              && operand_types[0].bitfield.class != RegSIMD)
+             || (operand_types[t->operands > 1].bitfield.class != RegMMX
+                 && operand_types[t->operands > 1].bitfield.class != RegSIMD))
           && (t->base_opcode != 0x0fc7
               || t->extension_opcode != 1 /* cmpxchg8b */))
         continue;
@@ -5615,10 +5782,11 @@ match_template (char mnem_suffix)
                    ? (!t->opcode_modifier.ignoresize
                       && !intel_float_operand (t->name))
                    : intel_float_operand (t->name) != 2)
-              && ((!operand_types[0].bitfield.regmmx
-                   && !operand_types[0].bitfield.regsimd)
-                  || (!operand_types[t->operands > 1].bitfield.regmmx
-                      && !operand_types[t->operands > 1].bitfield.regsimd)))
+              && ((operand_types[0].bitfield.class != RegMMX
+                   && operand_types[0].bitfield.class != RegSIMD)
+                  || (operand_types[t->operands > 1].bitfield.class != RegMMX
+                      && operand_types[t->operands > 1].bitfield.class
+                         != RegSIMD)))
         continue;
  
        /* Do not verify operands when there are none.  */
@@ -5703,16 +5871,18 @@ match_template (char mnem_suffix)
              zero-extend %eax to %rax.  */
           if (flag_code == CODE_64BIT
               && t->base_opcode == 0x90
-             && operand_type_equal (&i.types [0], &acc32)
-             && operand_type_equal (&i.types [1], &acc32))
+             && i.types[0].bitfield.instance == Accum
+             && i.types[0].bitfield.dword
+             && i.types[1].bitfield.instance == Accum
+             && i.types[1].bitfield.dword)
             continue;
           /* xrelease mov %eax, <disp> is another special case. It must not
              match the accumulator-only encoding of mov.  */
           if (flag_code != CODE_64BIT
               && i.hle_prefix
               && t->base_opcode == 0xa0
-             && i.types[0].bitfield.acc
-             && operand_type_check (i.types[1], anymem))
+             && i.types[0].bitfield.instance == Accum
+             && (i.flags[1] & Operand_Mem))
             continue;
           /* Fall through.  */
  
@@ -5731,13 +5901,13 @@ match_template (char mnem_suffix)
               {
               case dir_encoding_load:
                 if (operand_type_check (operand_types[i.operands - 1], anymem)
-                   || operand_types[i.operands - 1].bitfield.regmem)
+                   || t->opcode_modifier.regmem)
                   goto check_reverse;
                 break;
  
               case dir_encoding_store:
                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
-                   && !operand_types[i.operands - 1].bitfield.regmem)
+                   && !t->opcode_modifier.regmem)
                   goto check_reverse;
                 break;
  
@@ -5794,8 +5964,8 @@ check_reverse:
                 found_reverse_match = Opcode_FloatD;
               else if (operand_types[0].bitfield.xmmword
                        || operand_types[i.operands - 1].bitfield.xmmword
-                      || operand_types[0].bitfield.regmmx
-                      || operand_types[i.operands - 1].bitfield.regmmx
+                      || operand_types[0].bitfield.class == RegMMX
+                      || operand_types[i.operands - 1].bitfield.class == RegMMX
                        || is_any_vex_encoding(t))
                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
                                       ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
@@ -5963,11 +6133,8 @@ check_reverse:
    if (!quiet_warnings)
      {
        if (!intel_syntax
-         && (i.types[0].bitfield.jumpabsolute
-             != operand_types[0].bitfield.jumpabsolute))
-       {
-         as_warn (_("indirect %s without `*'"), t->name);
-       }
+         && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
+       as_warn (_("indirect %s without `*'"), t->name);
  
        if (t->opcode_modifier.isprefix
           && t->opcode_modifier.ignoresize)
@@ -5987,14 +6154,22 @@ check_reverse:
  
    if (found_reverse_match)
      {
-      /* If we found a reverse match we must alter the opcode
-        direction bit.  found_reverse_match holds bits to change
-        (different for int & float insns).  */
+      /* If we found a reverse match we must alter the opcode direction
+        bit and clear/flip the regmem modifier one.  found_reverse_match
+        holds bits to change (different for int & float insns).  */
  
        i.tm.base_opcode ^= found_reverse_match;
  
        i.tm.operand_types[0] = operand_types[i.operands - 1];
        i.tm.operand_types[i.operands - 1] = operand_types[0];
+
+      /* Certain SIMD insns have their load forms specified in the opcode
+        table, and hence we need to _set_ RegMem instead of clearing it.
+        We need to avoid setting the bit though on insns like KMOVW.  */
+      i.tm.opcode_modifier.regmem
+       = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
+         && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
+         && !i.tm.opcode_modifier.regmem;
      }
  
    return t;
@@ -6003,34 +6178,24 @@ check_reverse:
  static int
  check_string (void)
  {
-  int mem_op = operand_type_check (i.types[0], anymem) ? 0 : 1;
-  if (i.tm.operand_types[mem_op].bitfield.esseg)
-    {
-      if (i.seg[0] != NULL && i.seg[0] != &es)
-       {
-         as_bad (_("`%s' operand %d must use `%ses' segment"),
-                 i.tm.name,
-                 mem_op + 1,
-                 register_prefix);
-         return 0;
-       }
-      /* There's only ever one segment override allowed per instruction.
-        This instruction possibly has a legal segment override on the
-        second operand, so copy the segment to where non-string
-        instructions store it, allowing common code.  */
-      i.seg[0] = i.seg[1];
-    }
-  else if (i.tm.operand_types[mem_op + 1].bitfield.esseg)
+  unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
+  unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
+
+  if (i.seg[op] != NULL && i.seg[op] != &es)
      {
-      if (i.seg[1] != NULL && i.seg[1] != &es)
-       {
-         as_bad (_("`%s' operand %d must use `%ses' segment"),
-                 i.tm.name,
-                 mem_op + 2,
-                 register_prefix);
-         return 0;
-       }
+      as_bad (_("`%s' operand %u must use `%ses' segment"),
+             i.tm.name,
+             intel_syntax ? i.tm.operands - es_op : es_op + 1,
+             register_prefix);
+      return 0;
      }
+
+  /* There's only ever one segment override allowed per instruction.
+     This instruction possibly has a legal segment override on the
+     second operand, so copy the segment to where non-string
+     instructions store it, allowing common code.  */
+  i.seg[op] = i.seg[1];
+
    return 1;
  }
  
@@ -6039,43 +6204,41 @@ process_suffix (void)
  {
    /* If matched instruction specifies an explicit instruction mnemonic
       suffix, use it.  */
-  if (i.tm.opcode_modifier.size16)
+  if (i.tm.opcode_modifier.size == SIZE16)
      i.suffix = WORD_MNEM_SUFFIX;
-  else if (i.tm.opcode_modifier.size32)
+  else if (i.tm.opcode_modifier.size == SIZE32)
      i.suffix = LONG_MNEM_SUFFIX;
-  else if (i.tm.opcode_modifier.size64)
+  else if (i.tm.opcode_modifier.size == SIZE64)
      i.suffix = QWORD_MNEM_SUFFIX;
-  else if (i.reg_operands)
+  else if (i.reg_operands
+          && (i.operands > 1 || i.types[0].bitfield.class == Reg))
      {
        /* If there's no instruction mnemonic suffix we try to invent one
-        based on register operands.  */
+        based on GPR operands.  */
        if (!i.suffix)
         {
           /* We take i.suffix from the last register operand specified,
              Destination register type is more significant than source
              register type.  crc32 in SSE4.2 prefers source register
              type. */
-         if (i.tm.base_opcode == 0xf20f38f1)
+         if (i.tm.base_opcode == 0xf20f38f0
+             && i.types[0].bitfield.class == Reg)
             {
-             if (i.types[0].bitfield.reg && i.types[0].bitfield.word)
+             if (i.types[0].bitfield.byte)
+               i.suffix = BYTE_MNEM_SUFFIX;
+             else if (i.types[0].bitfield.word)
                 i.suffix = WORD_MNEM_SUFFIX;
-             else if (i.types[0].bitfield.reg && i.types[0].bitfield.dword)
+             else if (i.types[0].bitfield.dword)
                 i.suffix = LONG_MNEM_SUFFIX;
-             else if (i.types[0].bitfield.reg && i.types[0].bitfield.qword)
+             else if (i.types[0].bitfield.qword)
                 i.suffix = QWORD_MNEM_SUFFIX;
             }
-         else if (i.tm.base_opcode == 0xf20f38f0)
-           {
-             if (i.types[0].bitfield.reg && i.types[0].bitfield.byte)
-               i.suffix = BYTE_MNEM_SUFFIX;
-           }
  
           if (!i.suffix)
             {
               int op;
  
-             if (i.tm.base_opcode == 0xf20f38f1
-                 || i.tm.base_opcode == 0xf20f38f0)
+             if (i.tm.base_opcode == 0xf20f38f0)
                 {
                   /* We have to know the operand size for crc32.  */
                   as_bad (_("ambiguous memory operand size for `%s`"),
@@ -6084,10 +6247,10 @@ process_suffix (void)
                 }
  
               for (op = i.operands; --op >= 0;)
-               if (!i.tm.operand_types[op].bitfield.inoutportreg
-                   && !i.tm.operand_types[op].bitfield.shiftcount)
+               if (i.tm.operand_types[op].bitfield.instance == InstanceNone
+                   || i.tm.operand_types[op].bitfield.instance == Accum)
                   {
-                   if (!i.types[op].bitfield.reg)
+                   if (i.types[op].bitfield.class != Reg)
                       continue;
                     if (i.types[op].bitfield.byte)
                       i.suffix = BYTE_MNEM_SUFFIX;
@@ -6152,15 +6315,34 @@ process_suffix (void)
    else if (i.tm.opcode_modifier.defaultsize
            && !i.suffix
            /* exclude fldenv/frstor/fsave/fstenv */
-          && i.tm.opcode_modifier.no_ssuf)
+          && i.tm.opcode_modifier.no_ssuf
+          /* exclude sysret */
+          && i.tm.base_opcode != 0x0f07)
      {
        i.suffix = stackop_size;
+      if (stackop_size == LONG_MNEM_SUFFIX)
+       {
+         /* stackop_size is set to LONG_MNEM_SUFFIX for the
+            .code16gcc directive to support 16-bit mode with
+            32-bit address.  For IRET without a suffix, generate
+            16-bit IRET (opcode 0xcf) to return from an interrupt
+            handler.  */
+         if (i.tm.base_opcode == 0xcf)
+           {
+             i.suffix = WORD_MNEM_SUFFIX;
+             as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
+           }
+         /* Warn about changed behavior for segment register push/pop.  */
+         else if ((i.tm.base_opcode | 1) == 0x07)
+           as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
+                    i.tm.name);
+       }
      }
    else if (intel_syntax
            && !i.suffix
-          && (i.tm.operand_types[0].bitfield.jumpabsolute
-              || i.tm.opcode_modifier.jumpbyte
-              || i.tm.opcode_modifier.jumpintersegment
+          && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
+              || i.tm.opcode_modifier.jump == JUMP_BYTE
+              || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
                || (i.tm.base_opcode == 0x0f01 /* [ls][gi]dt */
                    && i.tm.extension_opcode <= 3)))
      {
@@ -6250,9 +6432,9 @@ process_suffix (void)
          size prefix, except for instructions that will ignore this
          prefix anyway.  */
        if (i.reg_operands > 0
-         && i.types[0].bitfield.reg
+         && i.types[0].bitfield.class == Reg
           && i.tm.opcode_modifier.addrprefixopreg
-         && (i.tm.opcode_modifier.immext
+         && (i.tm.operand_types[0].bitfield.instance == Accum
               || i.operands == 1))
         {
           /* The address size override prefix changes the size of the
@@ -6267,16 +6449,14 @@ process_suffix (void)
        else if (i.suffix != QWORD_MNEM_SUFFIX
                && !i.tm.opcode_modifier.ignoresize
                && !i.tm.opcode_modifier.floatmf
-              && !i.tm.opcode_modifier.vex
-              && !i.tm.opcode_modifier.vexopcode
-              && !is_evex_encoding (&i.tm)
+              && !is_any_vex_encoding (&i.tm)
                && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
                    || (flag_code == CODE_64BIT
-                      && i.tm.opcode_modifier.jumpbyte)))
+                      && i.tm.opcode_modifier.jump == JUMP_BYTE)))
         {
           unsigned int prefix = DATA_PREFIX_OPCODE;
  
-         if (i.tm.opcode_modifier.jumpbyte) /* jcxz, loop */
+         if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
             prefix = ADDR_PREFIX_OPCODE;
  
           if (!add_prefix (prefix))
@@ -6292,8 +6472,10 @@ process_suffix (void)
           && ! (i.operands == 2
                 && i.tm.base_opcode == 0x90
                 && i.tm.extension_opcode == None
-               && operand_type_equal (&i.types [0], &acc64)
-               && operand_type_equal (&i.types [1], &acc64)))
+               && i.types[0].bitfield.instance == Accum
+               && i.types[0].bitfield.qword
+               && i.types[1].bitfield.instance == Accum
+               && i.types[1].bitfield.qword))
         i.rex |= REX_W;
  
        break;
@@ -6302,7 +6484,7 @@ process_suffix (void)
    if (i.reg_operands != 0
        && i.operands > 1
        && i.tm.opcode_modifier.addrprefixopreg
-      && !i.tm.opcode_modifier.immext)
+      && i.tm.operand_types[0].bitfield.instance != Accum)
      {
        /* Check invalid register operand when the address size override
          prefix changes the size of register operands.  */
@@ -6320,7 +6502,7 @@ process_suffix (void)
         }
  
        for (op = 0; op < i.operands; op++)
-       if (i.types[op].bitfield.reg
+       if (i.types[op].bitfield.class == Reg
             && ((need == need_word
                  && !i.op[op].regs->reg_type.bitfield.word)
                 || (need == need_dword
@@ -6345,7 +6527,7 @@ check_byte_reg (void)
    for (op = i.operands; --op >= 0;)
      {
        /* Skip non-register operands. */
-      if (!i.types[op].bitfield.reg)
+      if (i.types[op].bitfield.class != Reg)
         continue;
  
        /* If this is an eight bit register, it's OK.  If it's the 16 or
@@ -6355,7 +6537,8 @@ check_byte_reg (void)
         continue;
  
        /* I/O port address operands are OK too.  */
-      if (i.tm.operand_types[op].bitfield.inoutportreg)
+      if (i.tm.operand_types[op].bitfield.instance == RegD
+         && i.tm.operand_types[op].bitfield.word)
         continue;
  
        /* crc32 doesn't generate this warning.  */
@@ -6384,14 +6567,13 @@ check_byte_reg (void)
           continue;
         }
        /* Any other register is bad.  */
-      if (i.types[op].bitfield.reg
-         || i.types[op].bitfield.regmmx
-         || i.types[op].bitfield.regsimd
-         || i.types[op].bitfield.sreg2
-         || i.types[op].bitfield.sreg3
-         || i.types[op].bitfield.control
-         || i.types[op].bitfield.debug
-         || i.types[op].bitfield.test)
+      if (i.types[op].bitfield.class == Reg
+         || i.types[op].bitfield.class == RegMMX
+         || i.types[op].bitfield.class == RegSIMD
+         || i.types[op].bitfield.class == SReg
+         || i.types[op].bitfield.class == RegCR
+         || i.types[op].bitfield.class == RegDR
+         || i.types[op].bitfield.class == RegTR)
         {
           as_bad (_("`%s%s' not allowed with `%s%c'"),
                   register_prefix,
@@ -6411,13 +6593,13 @@ check_long_reg (void)
  
    for (op = i.operands; --op >= 0;)
      /* Skip non-register operands. */
-    if (!i.types[op].bitfield.reg)
+    if (i.types[op].bitfield.class != Reg)
        continue;
      /* Reject eight bit registers, except where the template requires
         them. (eg. movzb)  */
      else if (i.types[op].bitfield.byte
-            && (i.tm.operand_types[op].bitfield.reg
-                || i.tm.operand_types[op].bitfield.acc)
+            && (i.tm.operand_types[op].bitfield.class == Reg
+                || i.tm.operand_types[op].bitfield.instance == Accum)
              && (i.tm.operand_types[op].bitfield.word
                  || i.tm.operand_types[op].bitfield.dword))
        {
@@ -6431,8 +6613,8 @@ check_long_reg (void)
      /* Warn if the e prefix on a general reg is missing.  */
      else if ((!quiet_warnings || flag_code == CODE_64BIT)
              && i.types[op].bitfield.word
-            && (i.tm.operand_types[op].bitfield.reg
-                || i.tm.operand_types[op].bitfield.acc)
+            && (i.tm.operand_types[op].bitfield.class == Reg
+                || i.tm.operand_types[op].bitfield.instance == Accum)
              && i.tm.operand_types[op].bitfield.dword)
        {
         /* Prohibit these changes in the 64bit mode, since the
@@ -6453,13 +6635,13 @@ check_long_reg (void)
        }
      /* Warn if the r prefix on a general reg is present.  */
      else if (i.types[op].bitfield.qword
-            && (i.tm.operand_types[op].bitfield.reg
-                || i.tm.operand_types[op].bitfield.acc)
+            && (i.tm.operand_types[op].bitfield.class == Reg
+                || i.tm.operand_types[op].bitfield.instance == Accum)
              && i.tm.operand_types[op].bitfield.dword)
        {
         if (intel_syntax
             && i.tm.opcode_modifier.toqword
-           && !i.types[0].bitfield.regsimd)
+           && i.types[0].bitfield.class != RegSIMD)
           {
             /* Convert to QWORD.  We want REX byte. */
             i.suffix = QWORD_MNEM_SUFFIX;
@@ -6482,13 +6664,13 @@ check_qword_reg (void)
  
    for (op = i.operands; --op >= 0; )
      /* Skip non-register operands. */
-    if (!i.types[op].bitfield.reg)
+    if (i.types[op].bitfield.class != Reg)
        continue;
      /* Reject eight bit registers, except where the template requires
         them. (eg. movzb)  */
      else if (i.types[op].bitfield.byte
-            && (i.tm.operand_types[op].bitfield.reg
-                || i.tm.operand_types[op].bitfield.acc)
+            && (i.tm.operand_types[op].bitfield.class == Reg
+                || i.tm.operand_types[op].bitfield.instance == Accum)
              && (i.tm.operand_types[op].bitfield.word
                  || i.tm.operand_types[op].bitfield.dword))
        {
@@ -6502,15 +6684,15 @@ check_qword_reg (void)
      /* Warn if the r prefix on a general reg is missing.  */
      else if ((i.types[op].bitfield.word
               || i.types[op].bitfield.dword)
-            && (i.tm.operand_types[op].bitfield.reg
-                || i.tm.operand_types[op].bitfield.acc)
+            && (i.tm.operand_types[op].bitfield.class == Reg
+                || i.tm.operand_types[op].bitfield.instance == Accum)
              && i.tm.operand_types[op].bitfield.qword)
        {
         /* Prohibit these changes in the 64bit mode, since the
            lowering is more complicated.  */
         if (intel_syntax
             && i.tm.opcode_modifier.todword
-           && !i.types[0].bitfield.regsimd)
+           && i.types[0].bitfield.class != RegSIMD)
           {
             /* Convert to DWORD.  We don't want REX byte. */
             i.suffix = LONG_MNEM_SUFFIX;
@@ -6532,13 +6714,13 @@ check_word_reg (void)
    int op;
    for (op = i.operands; --op >= 0;)
      /* Skip non-register operands. */
-    if (!i.types[op].bitfield.reg)
+    if (i.types[op].bitfield.class != Reg)
        continue;
      /* Reject eight bit registers, except where the template requires
         them. (eg. movzb)  */
      else if (i.types[op].bitfield.byte
-            && (i.tm.operand_types[op].bitfield.reg
-                || i.tm.operand_types[op].bitfield.acc)
+            && (i.tm.operand_types[op].bitfield.class == Reg
+                || i.tm.operand_types[op].bitfield.instance == Accum)
              && (i.tm.operand_types[op].bitfield.word
                  || i.tm.operand_types[op].bitfield.dword))
        {
@@ -6553,8 +6735,8 @@ check_word_reg (void)
      else if ((!quiet_warnings || flag_code == CODE_64BIT)
              && (i.types[op].bitfield.dword
                  || i.types[op].bitfield.qword)
-            && (i.tm.operand_types[op].bitfield.reg
-                || i.tm.operand_types[op].bitfield.acc)
+            && (i.tm.operand_types[op].bitfield.class == Reg
+                || i.tm.operand_types[op].bitfield.instance == Accum)
              && i.tm.operand_types[op].bitfield.word)
        {
         /* Prohibit these changes in the 64bit mode, since the
@@ -6679,15 +6861,15 @@ process_operands (void)
                   && MAX_OPERANDS > dupl
                   && operand_type_equal (&i.types[dest], &regxmm));
  
-      if (i.tm.operand_types[0].bitfield.acc
+      if (i.tm.operand_types[0].bitfield.instance == Accum
           && i.tm.operand_types[0].bitfield.xmmword)
         {
           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
             {
               /* Keep xmm0 for instructions with VEX prefix and 3
                  sources.  */
-             i.tm.operand_types[0].bitfield.acc = 0;
-             i.tm.operand_types[0].bitfield.regsimd = 1;
+             i.tm.operand_types[0].bitfield.instance = InstanceNone;
+             i.tm.operand_types[0].bitfield.class = RegSIMD;
               goto duplicate;
             }
           else
@@ -6700,6 +6882,7 @@ process_operands (void)
                   i.op[j - 1] = i.op[j];
                   i.types[j - 1] = i.types[j];
                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
+                 i.flags[j - 1] = i.flags[j];
                 }
             }
         }
@@ -6716,6 +6899,7 @@ process_operands (void)
               i.op[j] = i.op[j - 1];
               i.types[j] = i.types[j - 1];
               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
+             i.flags[j] = i.flags[j - 1];
             }
           i.op[0].regs
             = (const reg_entry *) hash_find (reg_hash, "xmm0");
@@ -6731,6 +6915,7 @@ process_operands (void)
           i.op[dupl] = i.op[dest];
           i.types[dupl] = i.types[dest];
           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
+         i.flags[dupl] = i.flags[dest];
         }
        else
         {
@@ -6742,12 +6927,13 @@ duplicate:
           i.op[dupl] = i.op[dest];
           i.types[dupl] = i.types[dest];
           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
+         i.flags[dupl] = i.flags[dest];
         }
  
         if (i.tm.opcode_modifier.immext)
          process_immext ();
      }
-  else if (i.tm.operand_types[0].bitfield.acc
+  else if (i.tm.operand_types[0].bitfield.instance == Accum
            && i.tm.operand_types[0].bitfield.xmmword)
      {
        unsigned int j;
@@ -6760,6 +6946,8 @@ duplicate:
           /* We need to adjust fields in i.tm since they are used by
              build_modrm_byte.  */
           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
+
+         i.flags[j - 1] = i.flags[j];
         }
  
        i.operands--;
@@ -6771,7 +6959,7 @@ duplicate:
        unsigned int regnum, first_reg_in_group, last_reg_in_group;
  
        /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
-      gas_assert (i.operands >= 2 && i.types[1].bitfield.regsimd);
+      gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
        regnum = register_number (i.op[1].regs);
        first_reg_in_group = regnum & ~3;
        last_reg_in_group = first_reg_in_group + 3;
@@ -6804,57 +6992,7 @@ duplicate:
        i.reg_operands++;
      }
  
-  if (i.tm.opcode_modifier.shortform)
-    {
-      if (i.types[0].bitfield.sreg2
-         || i.types[0].bitfield.sreg3)
-       {
-         if (i.tm.base_opcode == POP_SEG_SHORT
-             && i.op[0].regs->reg_num == 1)
-           {
-             as_bad (_("you can't `pop %scs'"), register_prefix);
-             return 0;
-           }
-         i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
-         if ((i.op[0].regs->reg_flags & RegRex) != 0)
-           i.rex |= REX_B;
-       }
-      else
-       {
-         /* The register or float register operand is in operand
-            0 or 1.  */
-         unsigned int op;
-
-         if ((i.types[0].bitfield.reg && i.types[0].bitfield.tbyte)
-             || operand_type_check (i.types[0], reg))
-           op = 0;
-         else
-           op = 1;
-         /* Register goes in low 3 bits of opcode.  */
-         i.tm.base_opcode |= i.op[op].regs->reg_num;
-         if ((i.op[op].regs->reg_flags & RegRex) != 0)
-           i.rex |= REX_B;
-         if (!quiet_warnings && i.tm.opcode_modifier.ugh)
-           {
-             /* Warn about some common errors, but press on regardless.
-                The first case can be generated by gcc (<= 2.8.1).  */
-             if (i.operands == 2)
-               {
-                 /* Reversed arguments on faddp, fsubp, etc.  */
-                 as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
-                          register_prefix, i.op[!intel_syntax].regs->reg_name,
-                          register_prefix, i.op[intel_syntax].regs->reg_name);
-               }
-             else
-               {
-                 /* Extraneous `l' suffix on fp insn.  */
-                 as_warn (_("translating to `%s %s%s'"), i.tm.name,
-                          register_prefix, i.op[0].regs->reg_name);
-               }
-           }
-       }
-    }
-  else if (i.tm.opcode_modifier.modrm)
+  if (i.tm.opcode_modifier.modrm)
      {
        /* The opcode is completed (modulo i.tm.extension_opcode which
          must be put into the modrm byte).  Now, we make the modrm and
@@ -6862,6 +7000,25 @@ duplicate:
  
        default_seg = build_modrm_byte ();
      }
+  else if (i.types[0].bitfield.class == SReg)
+    {
+      if (flag_code != CODE_64BIT
+         ? i.tm.base_opcode == POP_SEG_SHORT
+           && i.op[0].regs->reg_num == 1
+         : (i.tm.base_opcode | 1) == POP_SEG386_SHORT
+           && i.op[0].regs->reg_num < 4)
+       {
+         as_bad (_("you can't `%s %s%s'"),
+                 i.tm.name, register_prefix, i.op[0].regs->reg_name);
+         return 0;
+       }
+      if ( i.op[0].regs->reg_num > 3 && i.tm.opcode_length == 1 )
+       {
+         i.tm.base_opcode ^= POP_SEG_SHORT ^ POP_SEG386_SHORT;
+         i.tm.opcode_length = 2;
+       }
+      i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
+    }
    else if ((i.tm.base_opcode & ~0x3) == MOV_AX_DISP32)
      {
        default_seg = &ds;
@@ -6872,6 +7029,35 @@ duplicate:
          on one of their operands, the default segment is ds.  */
        default_seg = &ds;
      }
+  else if (i.tm.opcode_modifier.shortform)
+    {
+      /* The register or float register operand is in operand
+        0 or 1.  */
+      unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
+
+      /* Register goes in low 3 bits of opcode.  */
+      i.tm.base_opcode |= i.op[op].regs->reg_num;
+      if ((i.op[op].regs->reg_flags & RegRex) != 0)
+       i.rex |= REX_B;
+      if (!quiet_warnings && i.tm.opcode_modifier.ugh)
+       {
+         /* Warn about some common errors, but press on regardless.
+            The first case can be generated by gcc (<= 2.8.1).  */
+         if (i.operands == 2)
+           {
+             /* Reversed arguments on faddp, fsubp, etc.  */
+             as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
+                      register_prefix, i.op[!intel_syntax].regs->reg_name,
+                      register_prefix, i.op[intel_syntax].regs->reg_name);
+           }
+         else
+           {
+             /* Extraneous `l' suffix on fp insn.  */
+             as_warn (_("translating to `%s %s%s'"), i.tm.name,
+                      register_prefix, i.op[0].regs->reg_name);
+           }
+       }
+    }
  
    if (i.tm.base_opcode == 0x8d /* lea */
        && i.seg[0]
@@ -6909,7 +7095,7 @@ build_modrm_byte (void)
  
        /* There are 2 kinds of instructions:
          1. 5 operands: 4 register operands or 3 register operands
-        plus 1 memory operand plus one Vec_Imm4 operand, VexXDS, and
+        plus 1 memory operand plus one Imm4 operand, VexXDS, and
          VexW0 or VexW1.  The destination must be either XMM, YMM or
          ZMM register.
          2. 4 operands: 4 register operands or 3 register operands
@@ -6918,7 +7104,7 @@ build_modrm_byte (void)
                    || (i.reg_operands == 3 && i.mem_operands == 1))
                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
                   && i.tm.opcode_modifier.vexw
-                 && i.tm.operand_types[dest].bitfield.regsimd);
+                 && i.tm.operand_types[dest].bitfield.class == RegSIMD);
  
        /* If VexW1 is set, the first non-immediate operand is the source and
          the second non-immediate one is encoded in the immediate operand.  */
@@ -6942,40 +7128,27 @@ build_modrm_byte (void)
           i.types[i.operands] = imm8;
           i.operands++;
  
-         gas_assert (i.tm.operand_types[reg_slot].bitfield.regsimd);
+         gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
           exp->X_op = O_constant;
           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
         }
        else
         {
-         unsigned int imm_slot;
-
-         gas_assert (i.imm_operands == 1 && i.types[0].bitfield.vec_imm4);
-
-         if (i.tm.opcode_modifier.immext)
-           {
-             /* When ImmExt is set, the immediate byte is the last
-                operand.  */
-             imm_slot = i.operands - 1;
-             source--;
-             reg_slot--;
-           }
-         else
-           {
-             imm_slot = 0;
+         gas_assert (i.imm_operands == 1);
+         gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
+         gas_assert (!i.tm.opcode_modifier.immext);
  
-             /* Turn on Imm8 so that output_imm will generate it.  */
-             i.types[imm_slot].bitfield.imm8 = 1;
-           }
+         /* Turn on Imm8 again so that output_imm will generate it.  */
+         i.types[0].bitfield.imm8 = 1;
  
-         gas_assert (i.tm.operand_types[reg_slot].bitfield.regsimd);
-         i.op[imm_slot].imms->X_add_number
+         gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
+         i.op[0].imms->X_add_number
               |= register_number (i.op[reg_slot].regs) << 4;
           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
         }
  
-      gas_assert (i.tm.operand_types[nds].bitfield.regsimd);
+      gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
        i.vex.register_specifier = i.op[nds].regs;
      }
    else
@@ -7007,9 +7180,11 @@ build_modrm_byte (void)
           gas_assert (i.imm_operands == 1
                       || (i.imm_operands == 0
                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
-                             || i.types[0].bitfield.shiftcount)));
+                             || (i.types[0].bitfield.instance == RegC
+                                 && i.types[0].bitfield.byte))));
           if (operand_type_check (i.types[0], imm)
-             || i.types[0].bitfield.shiftcount)
+             || (i.types[0].bitfield.instance == RegC
+                 && i.types[0].bitfield.byte))
             source = 1;
           else
             source = 0;
@@ -7077,8 +7252,7 @@ build_modrm_byte (void)
             {
               /* For instructions with VexNDS, the register-only source
                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
-                register.  It is encoded in VEX prefix.  We need to
-                clear RegMem bit before calling operand_type_equal.  */
+                register.  It is encoded in VEX prefix.  */
  
               i386_operand_type op;
               unsigned int vvvv;
@@ -7095,11 +7269,10 @@ build_modrm_byte (void)
                 vvvv = dest;
  
               op = i.tm.operand_types[vvvv];
-             op.bitfield.regmem = 0;
               if ((dest + 1) >= i.operands
-                 || ((!op.bitfield.reg
+                 || ((op.bitfield.class != Reg
                        || (!op.bitfield.dword && !op.bitfield.qword))
-                     && !op.bitfield.regsimd
+                     && op.bitfield.class != RegSIMD
                       && !operand_type_equal (&op, &regmask)))
                 abort ();
               i.vex.register_specifier = i.op[vvvv].regs;
@@ -7108,22 +7281,22 @@ build_modrm_byte (void)
         }
  
        i.rm.mode = 3;
-      /* One of the register operands will be encoded in the i.tm.reg
-        field, the other in the combined i.tm.mode and i.tm.regmem
+      /* One of the register operands will be encoded in the i.rm.reg
+        field, the other in the combined i.rm.mode and i.rm.regmem
          fields.  If no form of this instruction supports a memory
          destination operand, then we assume the source operand may
          sometimes be a memory operand and so we need to store the
          destination in the i.rm.reg field.  */
-      if (!i.tm.operand_types[dest].bitfield.regmem
+      if (!i.tm.opcode_modifier.regmem
           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
         {
           i.rm.reg = i.op[dest].regs->reg_num;
           i.rm.regmem = i.op[source].regs->reg_num;
-         if (i.op[dest].regs->reg_type.bitfield.regmmx
-              || i.op[source].regs->reg_type.bitfield.regmmx)
+         if (i.op[dest].regs->reg_type.bitfield.class == RegMMX
+              || i.op[source].regs->reg_type.bitfield.class == RegMMX)
             i.has_regmmx = TRUE;
-         else if (i.op[dest].regs->reg_type.bitfield.regsimd
-                  || i.op[source].regs->reg_type.bitfield.regsimd)
+         else if (i.op[dest].regs->reg_type.bitfield.class == RegSIMD
+                  || i.op[source].regs->reg_type.bitfield.class == RegSIMD)
             {
               if (i.types[dest].bitfield.zmmword
                   || i.types[source].bitfield.zmmword)
@@ -7158,7 +7331,7 @@ build_modrm_byte (void)
         }
        if (flag_code != CODE_64BIT && (i.rex & REX_R))
         {
-         if (!i.types[i.tm.operand_types[0].bitfield.regmem].bitfield.control)
+         if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
             abort ();
           i.rex &= ~REX_R;
           add_prefix (LOCK_PREFIX_OPCODE);
@@ -7174,7 +7347,7 @@ build_modrm_byte (void)
           unsigned int op;
  
           for (op = 0; op < i.operands; op++)
-           if (operand_type_check (i.types[op], anymem))
+           if (i.flags[op] & Operand_Mem)
               break;
           gas_assert (op < i.operands);
  
@@ -7474,16 +7647,15 @@ build_modrm_byte (void)
  
           for (op = 0; op < i.operands; op++)
             {
-             if (i.types[op].bitfield.reg
-                 || i.types[op].bitfield.regbnd
-                 || i.types[op].bitfield.regmask
-                 || i.types[op].bitfield.sreg2
-                 || i.types[op].bitfield.sreg3
-                 || i.types[op].bitfield.control
-                 || i.types[op].bitfield.debug
-                 || i.types[op].bitfield.test)
+             if (i.types[op].bitfield.class == Reg
+                 || i.types[op].bitfield.class == RegBND
+                 || i.types[op].bitfield.class == RegMask
+                 || i.types[op].bitfield.class == SReg
+                 || i.types[op].bitfield.class == RegCR
+                 || i.types[op].bitfield.class == RegDR
+                 || i.types[op].bitfield.class == RegTR)
                 break;
-             if (i.types[op].bitfield.regsimd)
+             if (i.types[op].bitfield.class == RegSIMD)
                 {
                   if (i.types[op].bitfield.zmmword)
                     i.has_regzmm = TRUE;
@@ -7493,7 +7665,7 @@ build_modrm_byte (void)
                     i.has_regxmm = TRUE;
                   break;
                 }
-             if (i.types[op].bitfield.regmmx)
+             if (i.types[op].bitfield.class == RegMMX)
                 {
                   i.has_regmmx = TRUE;
                   break;
@@ -7557,9 +7729,9 @@ build_modrm_byte (void)
             {
               i386_operand_type *type = &i.tm.operand_types[vex_reg];
  
-             if ((!type->bitfield.reg
+             if ((type->bitfield.class != Reg
                    || (!type->bitfield.dword && !type->bitfield.qword))
-                 && !type->bitfield.regsimd
+                 && type->bitfield.class != RegSIMD
                   && !operand_type_equal (type, &regmask))
                 abort ();
  
@@ -7699,6 +7871,12 @@ need_plt32_p (symbolS *s)
    if (!IS_ELF)
      return FALSE;
  
+#ifdef TE_SOLARIS
+  /* Don't emit PLT32 relocation on Solaris: neither native linker nor
+     krtld support it.  */
+  return FALSE;
+#endif
+
    /* Since there is no need to prepare for PLT branch on x86-64, we
       can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
       be used as a marker for 32-bit PC-relative branches.  */
@@ -7729,7 +7907,7 @@ output_jump (void)
    fixS *fixP;
    bfd_reloc_code_real_type jump_reloc = i.reloc[0];
  
-  if (i.tm.opcode_modifier.jumpbyte)
+  if (i.tm.opcode_modifier.jump == JUMP_BYTE)
      {
        /* This is a loop or jecxz type instruction.  */
        size = 1;
@@ -7894,7 +8072,6 @@ x86_cleanup (void)
    if (!IS_ELF || !x86_used_note)
      return;
  
-  x86_isa_1_used |= GNU_PROPERTY_X86_UINT32_VALID;
    x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
  
    /* The .note.gnu.property section layout:
@@ -7911,7 +8088,7 @@ x86_cleanup (void)
  
    /* Create the .note.gnu.property section.  */
    sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
-  bfd_set_section_flags (stdoutput, sec,
+  bfd_set_section_flags (sec,
                          (SEC_ALLOC
                           | SEC_LOAD
                           | SEC_DATA
@@ -7929,7 +8106,7 @@ x86_cleanup (void)
        alignment = 2;
      }
  
-  bfd_set_section_alignment (stdoutput, sec, alignment);
+  bfd_set_section_alignment (sec, alignment);
    elf_section_type (sec) = SHT_NOTE;
  
    /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
@@ -8000,6 +8177,25 @@ x86_cleanup (void)
  }
  #endif
  
+static unsigned int
+encoding_length (const fragS *start_frag, offsetT start_off,
+                const char *frag_now_ptr)
+{
+  unsigned int len = 0;
+
+  if (start_frag != frag_now)
+    {
+      const fragS *fr = start_frag;
+
+      do {
+       len += fr->fr_fix;
+       fr = fr->fr_next;
+      } while (fr && fr != frag_now);
+    }
+
+  return len - start_off + (frag_now_ptr - frag_now->fr_literal);
+}
+
  static void
  output_insn (void)
  {
@@ -8057,6 +8253,8 @@ output_insn (void)
         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
        if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
+      if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
+       x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
  
        if (i.tm.cpu_flags.bitfield.cpu8087
           || i.tm.cpu_flags.bitfield.cpu287
@@ -8064,15 +8262,9 @@ output_insn (void)
           || i.tm.cpu_flags.bitfield.cpu687
           || i.tm.cpu_flags.bitfield.cpufisttp)
         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
-      /* Don't set GNU_PROPERTY_X86_FEATURE_2_MMX for prefetchtXXX nor
-        Xfence instructions.  */
-      if (i.tm.base_opcode != 0xf18
-         && i.tm.base_opcode != 0xf0d
-         && i.tm.base_opcode != 0xfae
-         && (i.has_regmmx
-             || i.tm.cpu_flags.bitfield.cpummx
-             || i.tm.cpu_flags.bitfield.cpua3dnow
-             || i.tm.cpu_flags.bitfield.cpua3dnowa))
+      if (i.has_regmmx
+         || i.tm.base_opcode == 0xf77 /* emms */
+         || i.tm.base_opcode == 0xf0e /* femms */)
         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
        if (i.has_regxmm)
         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
@@ -8100,12 +8292,12 @@ output_insn (void)
    insn_start_off = frag_now_fix ();
  
    /* Output jumps.  */
-  if (i.tm.opcode_modifier.jump)
+  if (i.tm.opcode_modifier.jump == JUMP)
      output_branch ();
-  else if (i.tm.opcode_modifier.jumpbyte
-          || i.tm.opcode_modifier.jumpdword)
+  else if (i.tm.opcode_modifier.jump == JUMP_BYTE
+          || i.tm.opcode_modifier.jump == JUMP_DWORD)
      output_jump ();
-  else if (i.tm.opcode_modifier.jumpintersegment)
+  else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
      output_interseg_jump ();
    else
      {
@@ -8116,12 +8308,9 @@ output_insn (void)
        unsigned int prefix;
  
        if (avoid_fence
-         && i.tm.base_opcode == 0xfae
-         && i.operands == 1
-         && i.imm_operands == 1
-         && (i.op[0].imms->X_add_number == 0xe8
-             || i.op[0].imms->X_add_number == 0xf0
-             || i.op[0].imms->X_add_number == 0xf8))
+         && (i.tm.base_opcode == 0xfaee8
+             || i.tm.base_opcode == 0xfaef0
+             || i.tm.base_opcode == 0xfaef8))
          {
            /* Encode lfence, mfence, and sfence as
               f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
@@ -8150,17 +8339,17 @@ output_insn (void)
               if (i.tm.base_opcode & 0xff000000)
                 {
                   prefix = (i.tm.base_opcode >> 24) & 0xff;
-                 add_prefix (prefix);
+                 if (!i.tm.cpu_flags.bitfield.cpupadlock
+                     || prefix != REPE_PREFIX_OPCODE
+                     || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE))
+                   add_prefix (prefix);
                 }
               break;
             case 2:
               if ((i.tm.base_opcode & 0xff0000) != 0)
                 {
                   prefix = (i.tm.base_opcode >> 16) & 0xff;
-                 if (!i.tm.cpu_flags.bitfield.cpupadlock
-                     || prefix != REPE_PREFIX_OPCODE
-                     || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE))
-                   add_prefix (prefix);
+                 add_prefix (prefix);
                 }
               break;
             case 1:
@@ -8275,6 +8464,19 @@ output_insn (void)
  
        if (i.imm_operands)
         output_imm (insn_start_frag, insn_start_off);
+
+      /*
+       * frag_now_fix () returning plain abs_section_offset when we're in the
+       * absolute section, and abs_section_offset not getting updated as data
+       * gets added to the frag breaks the logic below.
+       */
+      if (now_seg != absolute_section)
+       {
+         j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
+         if (j > 15)
+           as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
+                    j);
+       }
      }
  
  #ifdef DEBUG386
@@ -8383,25 +8585,11 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off)
                                    == O_subtract))))
                       || reloc_type == BFD_RELOC_32_PCREL))
                 {
-                 offsetT add;
-
-                 if (insn_start_frag == frag_now)
-                   add = (p - frag_now->fr_literal) - insn_start_off;
-                 else
-                   {
-                     fragS *fr;
-
-                     add = insn_start_frag->fr_fix - insn_start_off;
-                     for (fr = insn_start_frag->fr_next;
-                          fr && fr != frag_now; fr = fr->fr_next)
-                       add += fr->fr_fix;
-                     add += p - frag_now->fr_literal;
-                   }
-
                   if (!object_64bit)
                     {
                       reloc_type = BFD_RELOC_386_GOTPC;
-                     i.op[n].imms->X_add_number += add;
+                     i.op[n].imms->X_add_number +=
+                       encoding_length (insn_start_frag, insn_start_off, p);
                     }
                   else if (reloc_type == BFD_RELOC_64)
                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
@@ -8417,12 +8605,13 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off)
               /* Check for "call/jmp *mem", "mov mem, %reg",
                  "test %reg, mem" and "binop mem, %reg" where binop
                  is one of adc, add, and, cmp, or, sbb, sub, xor
-                instructions.  Always generate R_386_GOT32X for
-                "sym*GOT" operand in 32-bit mode.  */
-             if ((generate_relax_relocations
-                  || (!object_64bit
-                      && i.rm.mode == 0
-                      && i.rm.regmem == 5))
+                instructions without data prefix.  Always generate
+                R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
+             if (i.prefix[DATA_PREFIX] == 0
+                 && (generate_relax_relocations
+                     || (!object_64bit
+                         && i.rm.mode == 0
+                         && i.rm.regmem == 5))
                   && (i.rm.mode == 2
                       || (i.rm.mode == 0 && i.rm.regmem == 5))
                   && ((i.operands == 1
@@ -8545,28 +8734,14 @@ output_imm (fragS *insn_start_frag, offsetT insn_start_off)
                                (i.op[n].imms->X_op_symbol)->X_op)
                               == O_subtract))))
                 {
-                 offsetT add;
-
-                 if (insn_start_frag == frag_now)
-                   add = (p - frag_now->fr_literal) - insn_start_off;
-                 else
-                   {
-                     fragS *fr;
-
-                     add = insn_start_frag->fr_fix - insn_start_off;
-                     for (fr = insn_start_frag->fr_next;
-                          fr && fr != frag_now; fr = fr->fr_next)
-                       add += fr->fr_fix;
-                     add += p - frag_now->fr_literal;
-                   }
-
                   if (!object_64bit)
                     reloc_type = BFD_RELOC_386_GOTPC;
                   else if (size == 4)
                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
                   else if (size == 8)
                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
-                 i.op[n].imms->X_add_number += add;
+                 i.op[n].imms->X_add_number +=
+                   encoding_length (insn_start_frag, insn_start_off, p);
                 }
               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
                            i.op[n].imms, 0, reloc_type);
@@ -8917,6 +9092,15 @@ x86_cons (expressionS *exp, int size)
               as_bad (_("missing or invalid expression `%s'"), save);
               *input_line_pointer = c;
             }
+         else if ((got_reloc == BFD_RELOC_386_PLT32
+                   || got_reloc == BFD_RELOC_X86_64_PLT32)
+                  && exp->X_op != O_symbol)
+           {
+             char c = *input_line_pointer;
+             *input_line_pointer = 0;
+             as_bad (_("invalid PLT expression `%s'"), save);
+             *input_line_pointer = c;
+           }
         }
      }
    else
@@ -9014,7 +9198,7 @@ check_VecOperations (char *op_string, char *op_end)
           else if ((mask = parse_register (op_string, &end_op)) != NULL)
             {
               /* k0 can't be used for write mask.  */
-             if (!mask->reg_type.bitfield.regmask || mask->reg_num == 0)
+             if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
                 {
                   as_bad (_("`%s%s' can't be used for write mask"),
                           register_prefix, mask->reg_name);
@@ -9292,9 +9476,9 @@ i386_displacement (char *disp_start, char *disp_end)
      }
  
    operand_type_set (&bigdisp, 0);
-  if ((i.types[this_operand].bitfield.jumpabsolute)
-      || (!current_templates->start->opcode_modifier.jump
-         && !current_templates->start->opcode_modifier.jumpdword))
+  if (i.jumpabsolute
+      || (current_templates->start->opcode_modifier.jump != JUMP
+         && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
      {
        bigdisp.bitfield.disp32 = 1;
        override = (i.prefix[ADDR_PREFIX] != 0);
@@ -9578,7 +9762,7 @@ i386_index_check (const char *operand_string)
    enum flag_code addr_mode = i386_addressing_mode ();
  
    if (current_templates->start->opcode_modifier.isstring
-      && !current_templates->start->opcode_modifier.immext
+      && !current_templates->start->cpu_flags.bitfield.cpupadlock
        && (current_templates->end[-1].opcode_modifier.isstring
           || i.mem_operands))
      {
@@ -9597,16 +9781,16 @@ i386_index_check (const char *operand_string)
  
        if (current_templates->start->opcode_modifier.repprefixok)
         {
-         i386_operand_type type = current_templates->end[-1].operand_types[0];
+         int es_op = current_templates->end[-1].opcode_modifier.isstring
+                     - IS_STRING_ES_OP0;
+         int op = 0;
  
-         if (!type.bitfield.baseindex
+         if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
               || ((!i.mem_operands != !intel_syntax)
                   && current_templates->end[-1].operand_types[1]
                      .bitfield.baseindex))
-           type = current_templates->end[-1].operand_types[1];
-         expected_reg = hash_find (reg_hash,
-                                   di_si[addr_mode][type.bitfield.esseg]);
-
+           op = 1;
+         expected_reg = hash_find (reg_hash, di_si[addr_mode][op == es_op]);
         }
        else
         expected_reg = hash_find (reg_hash, bx[addr_mode]);
@@ -9823,7 +10007,7 @@ i386_att_operand (char *operand_string)
        ++op_string;
        if (is_space_char (*op_string))
         ++op_string;
-      i.types[this_operand].bitfield.jumpabsolute = 1;
+      i.jumpabsolute = TRUE;
      }
  
    /* Check if operand is a register.  */
@@ -9836,9 +10020,7 @@ i386_att_operand (char *operand_string)
        op_string = end_op;
        if (is_space_char (*op_string))
         ++op_string;
-      if (*op_string == ':'
-         && (r->reg_type.bitfield.sreg2
-             || r->reg_type.bitfield.sreg3))
+      if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
         {
           switch (r->reg_num)
             {
@@ -9881,7 +10063,7 @@ i386_att_operand (char *operand_string)
               ++op_string;
               if (is_space_char (*op_string))
                 ++op_string;
-             i.types[this_operand].bitfield.jumpabsolute = 1;
+             i.jumpabsolute = TRUE;
             }
           goto do_memory_reference;
         }
@@ -9915,7 +10097,7 @@ i386_att_operand (char *operand_string)
    else if (*op_string == IMMEDIATE_PREFIX)
      {
        ++op_string;
-      if (i.types[this_operand].bitfield.jumpabsolute)
+      if (i.jumpabsolute)
         {
           as_bad (_("immediate operand illegal with absolute jump"));
           return 0;
@@ -10107,7 +10289,8 @@ i386_att_operand (char *operand_string)
  
        /* Special case for (%dx) while doing input/output op.  */
        if (i.base_reg
-         && i.base_reg->reg_type.bitfield.inoutportreg
+         && i.base_reg->reg_type.bitfield.instance == RegD
+         && i.base_reg->reg_type.bitfield.word
           && i.index_reg == 0
           && i.log2_scale_factor == 0
           && i.seg[i.mem_operands] == 0
@@ -10531,9 +10714,11 @@ md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
        {
        case BFD_RELOC_386_PLT32:
        case BFD_RELOC_X86_64_PLT32:
-       /* Make the jump instruction point to the address of the operand.  At
-          runtime we merely add the offset to the actual PLT entry.  */
-       value = -4;
+       /* Make the jump instruction point to the address of the operand.
+          At runtime we merely add the offset to the actual PLT entry.
+          NB: Subtract the offset size only for jump instructions.  */
+       if (fixP->fx_pcrel)
+         value = -4;
         break;
  
        case BFD_RELOC_386_TLS_GD:
@@ -10699,19 +10884,20 @@ parse_real_register (char *reg_string, char **end_op)
      return (const reg_entry *) NULL;
  
    if ((r->reg_type.bitfield.dword
-       || r->reg_type.bitfield.sreg3
-       || r->reg_type.bitfield.control
-       || r->reg_type.bitfield.debug
-       || r->reg_type.bitfield.test)
+       || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
+       || r->reg_type.bitfield.class == RegCR
+       || r->reg_type.bitfield.class == RegDR
+       || r->reg_type.bitfield.class == RegTR)
        && !cpu_arch_flags.bitfield.cpui386)
      return (const reg_entry *) NULL;
  
-  if (r->reg_type.bitfield.regmmx && !cpu_arch_flags.bitfield.cpummx)
+  if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
      return (const reg_entry *) NULL;
  
    if (!cpu_arch_flags.bitfield.cpuavx512f)
      {
-      if (r->reg_type.bitfield.zmmword || r->reg_type.bitfield.regmask)
+      if (r->reg_type.bitfield.zmmword
+         || r->reg_type.bitfield.class == RegMask)
         return (const reg_entry *) NULL;
  
        if (!cpu_arch_flags.bitfield.cpuavx)
@@ -10724,7 +10910,7 @@ parse_real_register (char *reg_string, char **end_op)
         }
      }
  
-  if (r->reg_type.bitfield.regbnd && !cpu_arch_flags.bitfield.cpumpx)
+  if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
      return (const reg_entry *) NULL;
  
    /* Don't allow fake index register unless allow_index_reg isn't 0. */
@@ -10743,11 +10929,12 @@ parse_real_register (char *reg_string, char **end_op)
      }
  
    if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
-      && (!cpu_arch_flags.bitfield.cpulm || !r->reg_type.bitfield.control)
+      && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
        && flag_code != CODE_64BIT)
      return (const reg_entry *) NULL;
  
-  if (r->reg_type.bitfield.sreg3 && r->reg_num == RegFlat && !intel_syntax)
+  if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
+      && !intel_syntax)
      return (const reg_entry *) NULL;
  
    return r;
@@ -10884,6 +11071,7 @@ const char *md_shortopts = "qnO::";
  #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
  #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
  #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
+#define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
  
  struct option md_longopts[] =
  {
@@ -10908,6 +11096,7 @@ struct option md_longopts[] =
    {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
    {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
    {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
+  {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
    {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
    {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
    {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
@@ -10944,6 +11133,8 @@ md_parse_option (int c, const char *arg)
        /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
          should be emitted or not.  FIXME: Not implemented.  */
      case 'Q':
+      if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
+       return 0;
        break;
  
        /* -V: SVR4 argument to print version ID.  */
@@ -11225,6 +11416,15 @@ md_parse_option (int c, const char *arg)
         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
        break;
  
+    case OPTION_MVEXWIG:
+      if (strcmp (arg, "0") == 0)
+       vexwig = vexw0;
+      else if (strcmp (arg, "1") == 0)
+       vexwig = vexw1;
+      else
+       as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
+      break;
+
      case OPTION_MADD_BND_PREFIX:
        add_bnd_prefix = 1;
        break;
@@ -11314,7 +11514,7 @@ md_parse_option (int c, const char *arg)
         {
           optimize_for_space = 1;
           /* Turn on all encoding optimizations.  */
-         optimize = -1;
+         optimize = INT_MAX;
         }
        else
         {
@@ -11437,7 +11637,7 @@ md_show_usage (FILE *stream)
  {
  #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
    fprintf (stream, _("\
-  -Q                      ignored\n\
+  -Qy, -Qn                ignored\n\
    -V                      print assembler version number\n\
    -k                      ignored\n"));
  #endif
@@ -11483,6 +11683,10 @@ md_show_usage (FILE *stream)
                            encode scalar AVX instructions with specific vector\n\
                             length\n"));
    fprintf (stream, _("\
+  -mvexwig=[0|1] (default: 0)\n\
+                          encode VEX instructions with specific VEX.W value\n\
+                           for VEX.W bit ignored instructions\n"));
+  fprintf (stream, _("\
    -mevexlig=[128|256|512] (default: 128)\n\
                            encode scalar EVEX instructions with specific vector\n\
                             length\n"));
@@ -11715,7 +11919,7 @@ md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
          work.  */
        int align;
  
-      align = bfd_get_section_alignment (stdoutput, segment);
+      align = bfd_section_alignment (segment);
        size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
      }
  #endif
@@ -12158,8 +12362,7 @@ handle_large_common (int small ATTRIBUTE_UNUSED)
           /* The .lbss section is for local .largecomm symbols.  */
           lbss_section = subseg_new (".lbss", 0);
           applicable = bfd_applicable_section_flags (stdoutput);
-         bfd_set_section_flags (stdoutput, lbss_section,
-                                applicable & SEC_ALLOC);
+         bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
           seg_info (lbss_section)->bss = 1;
  
           subseg_set (seg, subseg);