x86: StaticRounding implies SAE

[deliverable/binutils-gdb.git] / gas / config / tc-i386.c
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c

index 6c5d5b46c7477b944c00b912b0371b9b64b55a46..fe50566d86b2b2fe45c56da3fce616ff0b150994 100644 (file)
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -3557,7 +3557,7 @@ is_evex_encoding (const insn_template *t)
  {
    return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
-        || t->opcode_modifier.staticrounding || t->opcode_modifier.sae;
+        || t->opcode_modifier.sae;
  }
  
  static INLINE bfd_boolean
@@ -3897,7 +3897,7 @@ check_hle (void)
  static void
  optimize_encoding (void)
  {
-  int j;
+  unsigned int j;
  
    if (optimize_for_space
        && i.reg_operands == 1
@@ -4095,10 +4095,13 @@ optimize_encoding (void)
            && !i.types[0].bitfield.zmmword
            && !i.types[1].bitfield.zmmword
            && !i.mask
+          && !i.broadcast
            && is_evex_encoding (&i.tm)
            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
-              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
+              || (i.tm.base_opcode & ~4) == 0x66db
+              || (i.tm.base_opcode & ~4) == 0x66eb)
            && i.tm.extension_opcode == None)
      {
        /* Optimize: -O1:
@@ -4116,8 +4119,17 @@ optimize_encoding (void)
                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
              EVEX VOP mem, %ymmN
                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
+          VOP, one of vpand, vpandn, vpor, vpxor:
+            EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
+              -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
+            EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
+              -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
+            EVEX VOP{d,q} mem, %xmmM, %xmmN
+              -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
+            EVEX VOP{d,q} mem, %ymmM, %ymmN
+              -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
         */
-      for (j = 0; j < 2; j++)
+      for (j = 0; j < i.operands; j++)
         if (operand_type_check (i.types[j], disp)
             && i.op[j].disps->X_op == O_constant)
           {
@@ -4147,16 +4159,12 @@ optimize_encoding (void)
        i.tm.opcode_modifier.vexw = VEXW0;
        i.tm.opcode_modifier.evex = 0;
        i.tm.opcode_modifier.masking = 0;
+      i.tm.opcode_modifier.broadcast = 0;
        i.tm.opcode_modifier.disp8memshift = 0;
        i.memshift = 0;
-      for (j = 0; j < 2; j++)
-       if (operand_type_check (i.types[j], disp)
-           && i.op[j].disps->X_op == O_constant)
-         {
-           i.types[j].bitfield.disp8
-             = fits_in_disp8 (i.op[j].disps->X_add_number);
-           break;
-         }
+      if (j < i.operands)
+       i.types[j].bitfield.disp8
+         = fits_in_disp8 (i.op[j].disps->X_add_number);
      }
  }
  
@@ -4388,9 +4396,9 @@ md_assemble (char *line)
  
    if (is_any_vex_encoding (&i.tm))
      {
-      if (flag_code == CODE_16BIT)
+      if (!cpu_arch_flags.bitfield.cpui286)
         {
-         as_bad (_("instruction `%s' isn't supported in 16-bit mode."),
+         as_bad (_("instruction `%s' isn't supported outside of protected mode."),
                   i.tm.name);
           return;
         }
@@ -5461,11 +5469,8 @@ check_VecOperands (const insn_template *t)
    /* Check RC/SAE.  */
    if (i.rounding)
      {
-      if ((i.rounding->type != saeonly
-          && !t->opcode_modifier.staticrounding)
-         || (i.rounding->type == saeonly
-             && (t->opcode_modifier.staticrounding
-                 || !t->opcode_modifier.sae)))
+      if (!t->opcode_modifier.sae
+         || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
         {
           i.error = unsupported_rc_sae;
           return 1;
@@ -8123,6 +8128,25 @@ x86_cleanup (void)
  }
  #endif
  
+static unsigned int
+encoding_length (const fragS *start_frag, offsetT start_off,
+                const char *frag_now_ptr)
+{
+  unsigned int len = 0;
+
+  if (start_frag != frag_now)
+    {
+      const fragS *fr = start_frag;
+
+      do {
+       len += fr->fr_fix;
+       fr = fr->fr_next;
+      } while (fr && fr != frag_now);
+    }
+
+  return len - start_off + (frag_now_ptr - frag_now->fr_literal);
+}
+
  static void
  output_insn (void)
  {
@@ -8400,6 +8424,19 @@ output_insn (void)
  
        if (i.imm_operands)
         output_imm (insn_start_frag, insn_start_off);
+
+      /*
+       * frag_now_fix () returning plain abs_section_offset when we're in the
+       * absolute section, and abs_section_offset not getting updated as data
+       * gets added to the frag breaks the logic below.
+       */
+      if (now_seg != absolute_section)
+       {
+         j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
+         if (j > 15)
+           as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
+                    j);
+       }
      }
  
  #ifdef DEBUG386
@@ -8508,25 +8545,11 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off)
                                    == O_subtract))))
                       || reloc_type == BFD_RELOC_32_PCREL))
                 {
-                 offsetT add;
-
-                 if (insn_start_frag == frag_now)
-                   add = (p - frag_now->fr_literal) - insn_start_off;
-                 else
-                   {
-                     fragS *fr;
-
-                     add = insn_start_frag->fr_fix - insn_start_off;
-                     for (fr = insn_start_frag->fr_next;
-                          fr && fr != frag_now; fr = fr->fr_next)
-                       add += fr->fr_fix;
-                     add += p - frag_now->fr_literal;
-                   }
-
                   if (!object_64bit)
                     {
                       reloc_type = BFD_RELOC_386_GOTPC;
-                     i.op[n].imms->X_add_number += add;
+                     i.op[n].imms->X_add_number +=
+                       encoding_length (insn_start_frag, insn_start_off, p);
                     }
                   else if (reloc_type == BFD_RELOC_64)
                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
@@ -8671,28 +8694,14 @@ output_imm (fragS *insn_start_frag, offsetT insn_start_off)
                                (i.op[n].imms->X_op_symbol)->X_op)
                               == O_subtract))))
                 {
-                 offsetT add;
-
-                 if (insn_start_frag == frag_now)
-                   add = (p - frag_now->fr_literal) - insn_start_off;
-                 else
-                   {
-                     fragS *fr;
-
-                     add = insn_start_frag->fr_fix - insn_start_off;
-                     for (fr = insn_start_frag->fr_next;
-                          fr && fr != frag_now; fr = fr->fr_next)
-                       add += fr->fr_fix;
-                     add += p - frag_now->fr_literal;
-                   }
-
                   if (!object_64bit)
                     reloc_type = BFD_RELOC_386_GOTPC;
                   else if (size == 4)
                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
                   else if (size == 8)
                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
-                 i.op[n].imms->X_add_number += add;
+                 i.op[n].imms->X_add_number +=
+                   encoding_length (insn_start_frag, insn_start_off, p);
                 }
               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
                            i.op[n].imms, 0, reloc_type);