x86: StaticRounding implies SAE

[deliverable/binutils-gdb.git] / gas / config / tc-i386.c
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c

index ed8cfe1ad31afd8c218a042c045645b911223d98..fe50566d86b2b2fe45c56da3fce616ff0b150994 100644 (file)
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1082,6 +1082,10 @@ static const arch_entry cpu_arch[] =
      CPU_MOVDIR64B_FLAGS, 0 },
    { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
      CPU_AVX512_BF16_FLAGS, 0 },
+  { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
+    CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
+  { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
+    CPU_ENQCMD_FLAGS, 0 },
  };
  
  static const noarch_entry cpu_noarch[] =
@@ -1122,6 +1126,8 @@ static const noarch_entry cpu_noarch[] =
    { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
    { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
    { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
+  { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
+  { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
  };
  
  #ifdef I386COFF
@@ -1299,7 +1305,16 @@ i386_output_nops (char *where, const unsigned char *const *patt,
    /* Place the longer NOP first.  */
    int last;
    int offset;
-  const unsigned char *nops =  patt[max_single_nop_size - 1];
+  const unsigned char *nops;
+
+  if (max_single_nop_size < 1)
+    {
+      as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
+               max_single_nop_size);
+      return;
+    }
+
+  nops = patt[max_single_nop_size - 1];
  
    /* Use the smaller one if the requsted one isn't available.  */
    if (nops == NULL)
@@ -1880,8 +1895,6 @@ operand_type_xor (i386_operand_type x, i386_operand_type y)
    return x;
  }
  
-static const i386_operand_type acc32 = OPERAND_TYPE_ACC32;
-static const i386_operand_type acc64 = OPERAND_TYPE_ACC64;
  static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
  static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
  static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
@@ -2989,7 +3002,7 @@ static void pe (expressionS *);
  static void ps (symbolS *);
  
  static void
-pi (char *line, i386_insn *x)
+pi (const char *line, i386_insn *x)
  {
    unsigned int j;
  
@@ -3090,6 +3103,10 @@ const type_names[] =
    { OPERAND_TYPE_REG16, "r16" },
    { OPERAND_TYPE_REG32, "r32" },
    { OPERAND_TYPE_REG64, "r64" },
+  { OPERAND_TYPE_ACC8, "acc8" },
+  { OPERAND_TYPE_ACC16, "acc16" },
+  { OPERAND_TYPE_ACC32, "acc32" },
+  { OPERAND_TYPE_ACC64, "acc64" },
    { OPERAND_TYPE_IMM8, "i8" },
    { OPERAND_TYPE_IMM8, "i8s" },
    { OPERAND_TYPE_IMM16, "i16" },
@@ -3112,7 +3129,6 @@ const type_names[] =
    { OPERAND_TYPE_FLOATACC, "FAcc" },
    { OPERAND_TYPE_SREG2, "SReg2" },
    { OPERAND_TYPE_SREG3, "SReg3" },
-  { OPERAND_TYPE_ACC, "Acc" },
    { OPERAND_TYPE_JUMPABSOLUTE, "Jump Absolute" },
    { OPERAND_TYPE_REGMMX, "rMMX" },
    { OPERAND_TYPE_REGXMM, "rXMM" },
@@ -3131,7 +3147,7 @@ pt (i386_operand_type t)
    for (j = 0; j < ARRAY_SIZE (type_names); j++)
      {
        a = operand_type_and (t, type_names[j].mask);
-      if (!operand_type_all_zero (&a))
+      if (operand_type_equal (&a, &type_names[j].mask))
         fprintf (stdout, "%s, ",  type_names[j].name);
      }
    fflush (stdout);
@@ -3541,7 +3557,7 @@ is_evex_encoding (const insn_template *t)
  {
    return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
-        || t->opcode_modifier.staticrounding || t->opcode_modifier.sae;
+        || t->opcode_modifier.sae;
  }
  
  static INLINE bfd_boolean
@@ -3881,7 +3897,7 @@ check_hle (void)
  static void
  optimize_encoding (void)
  {
-  int j;
+  unsigned int j;
  
    if (optimize_for_space
        && i.reg_operands == 1
@@ -3933,7 +3949,10 @@ optimize_encoding (void)
                                 && i.tm.extension_opcode == 0x4)
                             || ((i.tm.base_opcode == 0xf6
                                  || i.tm.base_opcode == 0xc6)
-                               && i.tm.extension_opcode == 0x0)))))
+                               && i.tm.extension_opcode == 0x0)))
+                   || (fits_in_imm7 (i.op[0].imms->X_add_number)
+                       && i.tm.base_opcode == 0x83
+                       && i.tm.extension_opcode == 0x4)))
                || (i.types[0].bitfield.qword
                    && ((i.reg_operands == 2
                         && i.op[0].regs == i.op[1].regs
@@ -3947,6 +3966,7 @@ optimize_encoding (void)
      {
        /* Optimize: -O:
            andq $imm31, %r64   -> andl $imm31, %r32
+          andq $imm7, %r64    -> andl $imm7, %r32
            testq $imm31, %r64  -> testl $imm31, %r32
            xorq %r64, %r64     -> xorl %r32, %r32
            subq %r64, %r64     -> subl %r32, %r32
@@ -4075,10 +4095,13 @@ optimize_encoding (void)
            && !i.types[0].bitfield.zmmword
            && !i.types[1].bitfield.zmmword
            && !i.mask
+          && !i.broadcast
            && is_evex_encoding (&i.tm)
            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
-              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
+              || (i.tm.base_opcode & ~4) == 0x66db
+              || (i.tm.base_opcode & ~4) == 0x66eb)
            && i.tm.extension_opcode == None)
      {
        /* Optimize: -O1:
@@ -4096,8 +4119,17 @@ optimize_encoding (void)
                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
              EVEX VOP mem, %ymmN
                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
+          VOP, one of vpand, vpandn, vpor, vpxor:
+            EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
+              -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
+            EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
+              -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
+            EVEX VOP{d,q} mem, %xmmM, %xmmN
+              -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
+            EVEX VOP{d,q} mem, %ymmM, %ymmN
+              -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
         */
-      for (j = 0; j < 2; j++)
+      for (j = 0; j < i.operands; j++)
         if (operand_type_check (i.types[j], disp)
             && i.op[j].disps->X_op == O_constant)
           {
@@ -4127,16 +4159,12 @@ optimize_encoding (void)
        i.tm.opcode_modifier.vexw = VEXW0;
        i.tm.opcode_modifier.evex = 0;
        i.tm.opcode_modifier.masking = 0;
+      i.tm.opcode_modifier.broadcast = 0;
        i.tm.opcode_modifier.disp8memshift = 0;
        i.memshift = 0;
-      for (j = 0; j < 2; j++)
-       if (operand_type_check (i.types[j], disp)
-           && i.op[j].disps->X_op == O_constant)
-         {
-           i.types[j].bitfield.disp8
-             = fits_in_disp8 (i.op[j].disps->X_add_number);
-           break;
-         }
+      if (j < i.operands)
+       i.types[j].bitfield.disp8
+         = fits_in_disp8 (i.op[j].disps->X_add_number);
      }
  }
  
@@ -4368,9 +4396,9 @@ md_assemble (char *line)
  
    if (is_any_vex_encoding (&i.tm))
      {
-      if (flag_code == CODE_16BIT)
+      if (!cpu_arch_flags.bitfield.cpui286)
         {
-         as_bad (_("instruction `%s' isn't supported in 16-bit mode."),
+         as_bad (_("instruction `%s' isn't supported outside of protected mode."),
                   i.tm.name);
           return;
         }
@@ -5441,11 +5469,8 @@ check_VecOperands (const insn_template *t)
    /* Check RC/SAE.  */
    if (i.rounding)
      {
-      if ((i.rounding->type != saeonly
-          && !t->opcode_modifier.staticrounding)
-         || (i.rounding->type == saeonly
-             && (t->opcode_modifier.staticrounding
-                 || !t->opcode_modifier.sae)))
+      if (!t->opcode_modifier.sae
+         || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
         {
           i.error = unsupported_rc_sae;
           return 1;
@@ -5795,8 +5820,8 @@ match_template (char mnem_suffix)
              zero-extend %eax to %rax.  */
           if (flag_code == CODE_64BIT
               && t->base_opcode == 0x90
-             && operand_type_equal (&i.types [0], &acc32)
-             && operand_type_equal (&i.types [1], &acc32))
+             && i.types[0].bitfield.acc && i.types[0].bitfield.dword
+             && i.types[1].bitfield.acc && i.types[1].bitfield.dword)
             continue;
           /* xrelease mov %eax, <disp> is another special case. It must not
              match the accumulator-only encoding of mov.  */
@@ -6242,7 +6267,19 @@ process_suffix (void)
            /* exclude fldenv/frstor/fsave/fstenv */
            && i.tm.opcode_modifier.no_ssuf)
      {
-      i.suffix = stackop_size;
+      if (stackop_size == LONG_MNEM_SUFFIX
+         && i.tm.base_opcode == 0xcf)
+       {
+         /* stackop_size is set to LONG_MNEM_SUFFIX for the
+            .code16gcc directive to support 16-bit mode with
+            32-bit address.  For IRET without a suffix, generate
+            16-bit IRET (opcode 0xcf) to return from an interrupt
+            handler.  */
+         i.suffix = WORD_MNEM_SUFFIX;
+         as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
+       }
+      else
+       i.suffix = stackop_size;
      }
    else if (intel_syntax
            && !i.suffix
@@ -6355,9 +6392,7 @@ process_suffix (void)
        else if (i.suffix != QWORD_MNEM_SUFFIX
                && !i.tm.opcode_modifier.ignoresize
                && !i.tm.opcode_modifier.floatmf
-              && !i.tm.opcode_modifier.vex
-              && !i.tm.opcode_modifier.vexopcode
-              && !is_evex_encoding (&i.tm)
+              && !is_any_vex_encoding (&i.tm)
                && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
                    || (flag_code == CODE_64BIT
                        && i.tm.opcode_modifier.jumpbyte)))
@@ -6380,8 +6415,8 @@ process_suffix (void)
           && ! (i.operands == 2
                 && i.tm.base_opcode == 0x90
                 && i.tm.extension_opcode == None
-               && operand_type_equal (&i.types [0], &acc64)
-               && operand_type_equal (&i.types [1], &acc64)))
+               && i.types[0].bitfield.acc && i.types[0].bitfield.qword
+               && i.types[1].bitfield.acc && i.types[1].bitfield.qword))
         i.rex |= REX_W;
  
        break;
@@ -7787,6 +7822,12 @@ need_plt32_p (symbolS *s)
    if (!IS_ELF)
      return FALSE;
  
+#ifdef TE_SOLARIS
+  /* Don't emit PLT32 relocation on Solaris: neither native linker nor
+     krtld support it.  */
+  return FALSE;
+#endif
+
    /* Since there is no need to prepare for PLT branch on x86-64, we
       can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
       be used as a marker for 32-bit PC-relative branches.  */
@@ -8087,6 +8128,25 @@ x86_cleanup (void)
  }
  #endif
  
+static unsigned int
+encoding_length (const fragS *start_frag, offsetT start_off,
+                const char *frag_now_ptr)
+{
+  unsigned int len = 0;
+
+  if (start_frag != frag_now)
+    {
+      const fragS *fr = start_frag;
+
+      do {
+       len += fr->fr_fix;
+       fr = fr->fr_next;
+      } while (fr && fr != frag_now);
+    }
+
+  return len - start_off + (frag_now_ptr - frag_now->fr_literal);
+}
+
  static void
  output_insn (void)
  {
@@ -8144,6 +8204,8 @@ output_insn (void)
         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
        if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
+      if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
+       x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
  
        if (i.tm.cpu_flags.bitfield.cpu8087
           || i.tm.cpu_flags.bitfield.cpu287
@@ -8362,6 +8424,19 @@ output_insn (void)
  
        if (i.imm_operands)
         output_imm (insn_start_frag, insn_start_off);
+
+      /*
+       * frag_now_fix () returning plain abs_section_offset when we're in the
+       * absolute section, and abs_section_offset not getting updated as data
+       * gets added to the frag breaks the logic below.
+       */
+      if (now_seg != absolute_section)
+       {
+         j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
+         if (j > 15)
+           as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
+                    j);
+       }
      }
  
  #ifdef DEBUG386
@@ -8470,25 +8545,11 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off)
                                    == O_subtract))))
                       || reloc_type == BFD_RELOC_32_PCREL))
                 {
-                 offsetT add;
-
-                 if (insn_start_frag == frag_now)
-                   add = (p - frag_now->fr_literal) - insn_start_off;
-                 else
-                   {
-                     fragS *fr;
-
-                     add = insn_start_frag->fr_fix - insn_start_off;
-                     for (fr = insn_start_frag->fr_next;
-                          fr && fr != frag_now; fr = fr->fr_next)
-                       add += fr->fr_fix;
-                     add += p - frag_now->fr_literal;
-                   }
-
                   if (!object_64bit)
                     {
                       reloc_type = BFD_RELOC_386_GOTPC;
-                     i.op[n].imms->X_add_number += add;
+                     i.op[n].imms->X_add_number +=
+                       encoding_length (insn_start_frag, insn_start_off, p);
                     }
                   else if (reloc_type == BFD_RELOC_64)
                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
@@ -8633,28 +8694,14 @@ output_imm (fragS *insn_start_frag, offsetT insn_start_off)
                                (i.op[n].imms->X_op_symbol)->X_op)
                               == O_subtract))))
                 {
-                 offsetT add;
-
-                 if (insn_start_frag == frag_now)
-                   add = (p - frag_now->fr_literal) - insn_start_off;
-                 else
-                   {
-                     fragS *fr;
-
-                     add = insn_start_frag->fr_fix - insn_start_off;
-                     for (fr = insn_start_frag->fr_next;
-                          fr && fr != frag_now; fr = fr->fr_next)
-                       add += fr->fr_fix;
-                     add += p - frag_now->fr_literal;
-                   }
-
                   if (!object_64bit)
                     reloc_type = BFD_RELOC_386_GOTPC;
                   else if (size == 4)
                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
                   else if (size == 8)
                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
-                 i.op[n].imms->X_add_number += add;
+                 i.op[n].imms->X_add_number +=
+                   encoding_length (insn_start_frag, insn_start_off, p);
                 }
               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
                            i.op[n].imms, 0, reloc_type);