tc-xgate.c: Replace R_XGATE_PCREL_X with BFD_RELOC_XGATE_PCREL_X

[deliverable/binutils-gdb.git] / gas / config / tc-arm.c
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c

index 8436b32b04fde832bc1b58b1ae506e7463abb718..a69300697f0f8b2de2d22011aee530163ab2e5bb 100644 (file)
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -1,5 +1,5 @@
  /* tc-arm.c -- Assemble for the ARM
-   Copyright (C) 1994-2019 Free Software Foundation, Inc.
+   Copyright (C) 1994-2020 Free Software Foundation, Inc.
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
         Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
@@ -154,6 +154,7 @@ static int pic_code      = FALSE;
  static int fix_v4bx         = FALSE;
  /* Warn on using deprecated features.  */
  static int warn_on_deprecated = TRUE;
+static int warn_on_restrict_it = FALSE;
  
  /* Understand CodeComposer Studio assembly syntax.  */
  bfd_boolean codecomposer_syntax = FALSE;
@@ -229,6 +230,7 @@ static const arm_feature_set arm_ext_div = ARM_FEATURE_CORE_LOW (ARM_EXT_DIV);
  static const arm_feature_set arm_ext_v7 = ARM_FEATURE_CORE_LOW (ARM_EXT_V7);
  static const arm_feature_set arm_ext_v7a = ARM_FEATURE_CORE_LOW (ARM_EXT_V7A);
  static const arm_feature_set arm_ext_v7r = ARM_FEATURE_CORE_LOW (ARM_EXT_V7R);
+static const arm_feature_set arm_ext_v8r = ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8R);
  #ifdef OBJ_ELF
  static const arm_feature_set ATTRIBUTE_UNUSED arm_ext_v7m = ARM_FEATURE_CORE_LOW (ARM_EXT_V7M);
  #endif
@@ -277,11 +279,31 @@ static const arm_feature_set arm_ext_predres =
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
  static const arm_feature_set arm_ext_bf16 =
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16);
+static const arm_feature_set arm_ext_i8mm =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM);
+static const arm_feature_set arm_ext_crc =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC);
+static const arm_feature_set arm_ext_cde =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE);
+static const arm_feature_set arm_ext_cde0 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE0);
+static const arm_feature_set arm_ext_cde1 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE1);
+static const arm_feature_set arm_ext_cde2 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE2);
+static const arm_feature_set arm_ext_cde3 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE3);
+static const arm_feature_set arm_ext_cde4 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE4);
+static const arm_feature_set arm_ext_cde5 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE5);
+static const arm_feature_set arm_ext_cde6 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE6);
+static const arm_feature_set arm_ext_cde7 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE7);
  
  static const arm_feature_set arm_arch_any = ARM_ANY;
-#ifdef OBJ_ELF
  static const arm_feature_set fpu_any = FPU_ANY;
-#endif
  static const arm_feature_set arm_arch_full ATTRIBUTE_UNUSED = ARM_FEATURE (-1, -1, -1);
  static const arm_feature_set arm_arch_t2 = ARM_ARCH_THUMB2;
  static const arm_feature_set arm_arch_none = ARM_ARCH_NONE;
@@ -315,9 +337,14 @@ static const arm_feature_set fpu_neon_ext_v1 =
  static const arm_feature_set fpu_vfp_v3_or_neon_ext =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
  static const arm_feature_set mve_ext =
-  ARM_FEATURE_COPROC (FPU_MVE);
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE);
  static const arm_feature_set mve_fp_ext =
-  ARM_FEATURE_COPROC (FPU_MVE_FP);
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE_FP);
+/* Note: This has more than one bit set, which means using it with
+   mark_feature_used (which returns if *any* of the bits are set in the current
+   cpu variant) can give surprising results.  */
+static const arm_feature_set armv8m_fp =
+  ARM_FEATURE_COPROC (FPU_VFP_V5_SP_D16);
  #ifdef OBJ_ELF
  static const arm_feature_set fpu_vfp_fp16 =
    ARM_FEATURE_COPROC (FPU_VFP_EXT_FP16);
@@ -334,8 +361,6 @@ static const arm_feature_set fpu_neon_ext_armv8 =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8);
  static const arm_feature_set fpu_crypto_ext_armv8 =
    ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8);
-static const arm_feature_set crc_ext_armv8 =
-  ARM_FEATURE_COPROC (CRC_EXT_ARMV8);
  static const arm_feature_set fpu_neon_ext_v8_1 =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_RDMA);
  static const arm_feature_set fpu_neon_ext_dotprod =
@@ -459,7 +484,7 @@ struct neon_type_el
    unsigned size;
  };
  
-#define NEON_MAX_TYPE_ELS 4
+#define NEON_MAX_TYPE_ELS 5
  
  struct neon_type
  {
@@ -481,7 +506,7 @@ enum pred_instruction_type
     VPT_INSN,              /* The VPT/VPST insn has been parsed.  */
     MVE_OUTSIDE_PRED_INSN , /* Instruction to indicate a MVE instruction without
                               a predication code.  */
-   MVE_UNPREDICABLE_INSN   /* MVE instruction that is non-predicable.  */
+   MVE_UNPREDICABLE_INSN,  /* MVE instruction that is non-predicable.  */
  };
  
  /* The maximum number of operands we need.  */
@@ -704,7 +729,7 @@ const char * const reg_expected_msgs[] =
    [REG_TYPE_MMXWCG] = N_("iWMMXt scalar register expected"),
    [REG_TYPE_XSCALE] = N_("XScale accumulator register expected"),
    [REG_TYPE_MQ]            = N_("MVE vector register expected"),
-  [REG_TYPE_RNB]    = N_("")
+  [REG_TYPE_RNB]    = ""
  };
  
  /* Some well known registers that we refer to directly elsewhere.  */
@@ -881,6 +906,7 @@ struct asm_opcode
  #define BAD_ADDR_MODE   _("instruction does not accept this addressing mode")
  #define BAD_BRANCH     _("branch must be last instruction in IT block")
  #define BAD_BRANCH_OFF _("branch out of range or not a multiple of 2")
+#define BAD_NO_VPT     _("instruction not allowed in VPT block")
  #define BAD_NOT_IT     _("instruction not allowed in IT block")
  #define BAD_NOT_VPT    _("instruction missing MVE vector predication code")
  #define BAD_FPU                _("selected FPU does not support instruction")
@@ -898,6 +924,8 @@ struct asm_opcode
  #define BAD_RANGE      _("branch out of range")
  #define BAD_FP16       _("selected processor does not support fp16 instruction")
  #define BAD_BF16       _("selected processor does not support bf16 instruction")
+#define BAD_CDE        _("selected processor does not support cde instruction")
+#define BAD_CDE_COPROC _("coprocessor for insn is not enabled for cde")
  #define UNPRED_REG(R)  _("using " R " results in unpredictable behaviour")
  #define THUMB1_RELOC_ONLY  _("relocation valid in thumb1 code only")
  #define MVE_NOT_IT     _("Warning: instruction is UNPREDICTABLE in an IT " \
@@ -1218,6 +1246,52 @@ md_atof (int type, char * litP, int * sizeP)
        prec = 1;
        break;
  
+    /* If this is a bfloat16, then parse it slightly differently, as it
+       does not follow the IEEE specification for floating point numbers
+       exactly.  */
+    case 'b':
+      {
+       FLONUM_TYPE generic_float;
+
+       t = atof_ieee_detail (input_line_pointer, 1, 8, words, &generic_float);
+
+       if (t)
+         input_line_pointer = t;
+       else
+         return _("invalid floating point number");
+
+       switch (generic_float.sign)
+         {
+         /* Is +Inf.  */
+         case 'P':
+           words[0] = 0x7f80;
+           break;
+
+         /* Is -Inf.  */
+         case 'N':
+           words[0] = 0xff80;
+           break;
+
+         /* Is NaN.  */
+         /* bfloat16 has two types of NaN - quiet and signalling.
+            Quiet NaN has bit[6] == 1 && faction != 0, whereas
+            signalling NaN's have bit[0] == 0 && fraction != 0.
+            Chosen this specific encoding as it is the same form
+            as used by other IEEE 754 encodings in GAS.  */
+         case 0:
+           words[0] = 0x7fff;
+           break;
+
+         default:
+           break;
+         }
+
+       *sizeP = 2;
+
+       md_number_to_chars (litP, (valueT) words[0], sizeof (LITTLENUM_TYPE));
+
+       return NULL;
+      }
      case 'f':
      case 'F':
      case 's':
@@ -1896,7 +1970,7 @@ parse_reg_list (char ** strp, enum reg_list_els etype)
               const char apsr_str[] = "apsr";
               int apsr_str_len = strlen (apsr_str);
  
-             reg = arm_reg_parse (&str, REGLIST_RN);
+             reg = arm_reg_parse (&str, REG_TYPE_RN);
               if (etype == REGLIST_CLRM)
                 {
                   if (reg == REG_SP || reg == REG_PC)
@@ -2825,8 +2899,7 @@ s_unreq (int a ATTRIBUTE_UNUSED)
  
           hash_delete (arm_reg_hsh, name, FALSE);
           free ((char *) reg->name);
-         if (reg->neon)
-           free (reg->neon);
+         free (reg->neon);
           free (reg);
  
           /* Also locate the all upper case and all lower case versions.
@@ -2841,8 +2914,7 @@ s_unreq (int a ATTRIBUTE_UNUSED)
             {
               hash_delete (arm_reg_hsh, nbuf, FALSE);
               free ((char *) reg->name);
-             if (reg->neon)
-               free (reg->neon);
+             free (reg->neon);
               free (reg);
             }
  
@@ -2853,8 +2925,7 @@ s_unreq (int a ATTRIBUTE_UNUSED)
             {
               hash_delete (arm_reg_hsh, nbuf, FALSE);
               free ((char *) reg->name);
-             if (reg->neon)
-               free (reg->neon);
+             free (reg->neon);
               free (reg);
             }
  
@@ -4573,7 +4644,7 @@ s_arm_unwind_save_mmxwr (void)
      }
  
    return;
-error:
+ error:
    ignore_rest_of_line ();
  }
  
@@ -4641,7 +4712,7 @@ s_arm_unwind_save_mmxwcg (void)
    op = 0xc700 | mask;
    add_unwind_opcode (op, 2);
    return;
-error:
+ error:
    ignore_rest_of_line ();
  }
  
@@ -5016,7 +5087,7 @@ set_fp16_format (int dummy ATTRIBUTE_UNUSED)
         as_warn (_("float16 format cannot be set more than once, ignoring."));
      }
  
-cleanup:
+ cleanup:
    *input_line_pointer = saved_char;
    ignore_rest_of_line ();
  }
@@ -5088,6 +5159,7 @@ const pseudo_typeS md_pseudo_table[] =
    { "extend",     float_cons, 'x' },
    { "ldouble",    float_cons, 'x' },
    { "packed",     float_cons, 'p' },
+  { "bfloat16",           float_cons, 'b' },
  #ifdef TE_PE
    {"secrel32", pe_directive_secrel, 0},
  #endif
@@ -6292,7 +6364,7 @@ parse_psr (char **str, bfd_boolean lhs)
      goto unsupported_psr;
  
    p += 4;
-check_suffix:
+ check_suffix:
    if (*p == '_')
      {
        /* A suffix follows.  */
@@ -6998,6 +7070,8 @@ enum operand_parse_code
    OP_RNDMQ,     /* Neon double precision (0..31) or MVE vector register.  */
    OP_RNDMQR,    /* Neon double precision (0..31), MVE vector or ARM register.
                  */
+  OP_RNSDMQR,    /* Neon single or double precision, MVE vector or ARM register.
+                */
    OP_RNQ,      /* Neon quad precision register */
    OP_RNQMQ,    /* Neon quad or MVE vector register.  */
    OP_RVSD,     /* VFP single or double precision register */
@@ -7021,6 +7095,7 @@ enum operand_parse_code
    OP_RIWG,     /* iWMMXt wCG register */
    OP_RXA,      /* XScale accumulator register */
  
+  OP_RNSDMQ,   /* Neon single, double or MVE vector register */
    OP_RNSDQMQ,  /* Neon single, double or quad register or MVE vector register
                  */
    OP_RNSDQMQR, /* Neon single, double or quad register, MVE vector register or
@@ -7089,8 +7164,11 @@ enum operand_parse_code
    OP_I63s,     /*               -64 .. 63 */
    OP_I64,      /*                 1 .. 64 */
    OP_I64z,     /*                 0 .. 64 */
+  OP_I127,     /*                 0 .. 127 */
    OP_I255,     /*                 0 .. 255 */
-
+  OP_I511,     /*                 0 .. 511 */
+  OP_I4095,    /*                 0 .. 4095 */
+  OP_I8191,    /*                 0 .. 8191 */
    OP_I4b,      /* immediate, prefix optional, 1 .. 4 */
    OP_I7b,      /*                             0 .. 7 */
    OP_I15b,     /*                             0 .. 15 */
@@ -7147,6 +7225,8 @@ enum operand_parse_code
    OP_oRNSDQ,    /* Optional single, double or quad precision vector register */
    OP_oRNSDQMQ,  /* Optional single, double or quad register or MVE vector
                     register.  */
+  OP_oRNSDMQ,   /* Optional single, double register or MVE vector
+                   register.  */
    OP_oSHll,     /* LSL immediate */
    OP_oSHar,     /* ASR immediate */
    OP_oSHllar,   /* LSL or ASR immediate */
@@ -7343,6 +7423,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_RVS:   po_reg_or_fail (REG_TYPE_VFS);     break;
         case OP_RVD:   po_reg_or_fail (REG_TYPE_VFD);     break;
         case OP_oRND:
+       case OP_RNSDMQR:
+         po_reg_or_goto (REG_TYPE_VFS, try_rndmqr);
+         break;
+       try_rndmqr:
         case OP_RNDMQR:
           po_reg_or_goto (REG_TYPE_RN, try_rndmq);
           break;
@@ -7408,6 +7492,13 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_RVSD_COND:
           po_reg_or_goto (REG_TYPE_VFSD, try_cond);
           break;
+       case OP_oRNSDMQ:
+       case OP_RNSDMQ:
+         po_reg_or_goto (REG_TYPE_NSD, try_mq2);
+         break;
+         try_mq2:
+         po_reg_or_fail (REG_TYPE_MQ);
+         break;
         case OP_oRNSDQ:
         case OP_RNSDQ: po_reg_or_fail (REG_TYPE_NSDQ);    break;
         case OP_RNSDQMQR:
@@ -7604,8 +7695,11 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_I63:     po_imm_or_fail (  0,     63, FALSE);   break;
         case OP_I64:     po_imm_or_fail (  1,     64, FALSE);   break;
         case OP_I64z:    po_imm_or_fail (  0,     64, FALSE);   break;
+       case OP_I127:    po_imm_or_fail (  0,    127, FALSE);   break;
         case OP_I255:    po_imm_or_fail (  0,    255, FALSE);   break;
-
+       case OP_I511:    po_imm_or_fail (  0,    511, FALSE);   break;
+       case OP_I4095:   po_imm_or_fail (  0,    4095, FALSE);  break;
+       case OP_I8191:   po_imm_or_fail (  0,    8191, FALSE);  break;
         case OP_I4b:     po_imm_or_fail (  1,      4, TRUE);    break;
         case OP_oI7b:
         case OP_I7b:     po_imm_or_fail (  0,      7, TRUE);    break;
@@ -14724,6 +14818,15 @@ NEON_ENC_TAB
    X(2, (Q, R), QUAD),                  \
    X(2, (D, I), DOUBLE),                        \
    X(2, (Q, I), QUAD),                  \
+  X(3, (P, F, I), SINGLE),             \
+  X(3, (P, D, I), DOUBLE),             \
+  X(3, (P, Q, I), QUAD),               \
+  X(4, (P, F, F, I), SINGLE),          \
+  X(4, (P, D, D, I), DOUBLE),          \
+  X(4, (P, Q, Q, I), QUAD),            \
+  X(5, (P, F, F, F, I), SINGLE),       \
+  X(5, (P, D, D, D, I), DOUBLE),       \
+  X(5, (P, Q, Q, Q, I), QUAD),         \
    X(3, (D, L, D), DOUBLE),             \
    X(2, (D, Q), MIXED),                 \
    X(2, (Q, D), MIXED),                 \
@@ -14772,6 +14875,7 @@ NEON_ENC_TAB
  #define S2(A,B)                NS_##A##B
  #define S3(A,B,C)      NS_##A##B##C
  #define S4(A,B,C,D)    NS_##A##B##C##D
+#define S5(A,B,C,D,E)  NS_##A##B##C##D##E
  
  #define X(N, L, C) S##N L
  
@@ -14785,6 +14889,7 @@ enum neon_shape
  #undef S2
  #undef S3
  #undef S4
+#undef S5
  
  enum neon_shape_class
  {
@@ -14813,7 +14918,8 @@ enum neon_shape_el
    SE_I,
    SE_S,
    SE_R,
-  SE_L
+  SE_L,
+  SE_P
  };
  
  /* Register widths of above.  */
@@ -14826,6 +14932,7 @@ static unsigned neon_shape_el_size[] =
    0,
    32,
    32,
+  0,
    0
  };
  
@@ -14838,6 +14945,7 @@ struct neon_shape_info
  #define S2(A,B)                { SE_##A, SE_##B }
  #define S3(A,B,C)      { SE_##A, SE_##B, SE_##C }
  #define S4(A,B,C,D)    { SE_##A, SE_##B, SE_##C, SE_##D }
+#define S5(A,B,C,D,E)  { SE_##A, SE_##B, SE_##C, SE_##D, SE_##E }
  
  #define X(N, L, C) { N, S##N L }
  
@@ -14850,6 +14958,7 @@ static struct neon_shape_info neon_shape_tab[] =
  #undef S2
  #undef S3
  #undef S4
+#undef S5
  
  /* Bit masks used in type checking given instructions.
    'N_EQK' means the type must be the same as (or based on in some way) the key
@@ -15039,6 +15148,7 @@ neon_select_shape (enum neon_shape shape, ...)
                 matches = 0;
               break;
  
+           case SE_P:
             case SE_L:
               break;
             }
@@ -15767,6 +15877,8 @@ neon_logbits (unsigned x)
  
  #define LOW4(R) ((R) & 0xf)
  #define HI1(R) (((R) >> 4) & 1)
+#define LOW1(R) ((R) & 0x1)
+#define HI4(R) (((R) >> 1) & 0xf)
  
  static unsigned
  mve_get_vcmp_vpt_cond (struct neon_type_el et)
@@ -16480,36 +16592,6 @@ nsyn_insert_sp (void)
    inst.operands[0].present = 1;
  }
  
-static void
-do_vfp_nsyn_push (void)
-{
-  nsyn_insert_sp ();
-
-  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
-             _("register list must contain at least 1 and at most 16 "
-               "registers"));
-
-  if (inst.operands[1].issingle)
-    do_vfp_nsyn_opcode ("fstmdbs");
-  else
-    do_vfp_nsyn_opcode ("fstmdbd");
-}
-
-static void
-do_vfp_nsyn_pop (void)
-{
-  nsyn_insert_sp ();
-
-  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
-             _("register list must contain at least 1 and at most 16 "
-               "registers"));
-
-  if (inst.operands[1].issingle)
-    do_vfp_nsyn_opcode ("fldmias");
-  else
-    do_vfp_nsyn_opcode ("fldmiad");
-}
-
  /* Fix up Neon data-processing instructions, ORing in the correct bits for
     ARM mode or Thumb mode and moving the encoded bit 24 to bit 28.  */
  
@@ -18189,19 +18271,13 @@ do_mve_vmull (void)
  
    enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_DDS,
                                           NS_QQS, NS_QQQ, NS_QQR, NS_NULL);
-  if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
-      && inst.cond == COND_ALWAYS
+  if (inst.cond == COND_ALWAYS
        && ((unsigned)inst.instruction) == M_MNEM_vmullt)
      {
+
        if (rs == NS_QQQ)
         {
-
-         struct neon_type_el et = neon_check_type (3, rs, N_EQK , N_EQK,
-                                                   N_SUF_32 | N_F64 | N_P8
-                                                   | N_P16 | N_I_MVE | N_KEY);
-         if (((et.type == NT_poly) && et.size == 8
-              && ARM_CPU_IS_ANY (cpu_variant))
-             || (et.type == NT_integer) || (et.type == NT_float))
+         if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
             goto neon_vmul;
         }
        else
@@ -18230,7 +18306,7 @@ do_mve_vmull (void)
  
    return;
  
-neon_vmul:
+ neon_vmul:
    inst.instruction = N_MNEM_vmul;
    inst.cond = 0xb;
    if (thumb_mode)
@@ -19498,8 +19574,6 @@ do_neon_mvn (void)
    if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
      {
        constraint (!inst.operands[1].isreg && !inst.operands[0].isquad, BAD_FPU);
-      constraint ((inst.instruction & 0xd00) == 0xd00,
-                 _("immediate value out of range"));
      }
  }
  
@@ -19526,7 +19600,7 @@ neon_mixed_length (struct neon_type_el et, unsigned size)
  static void
  do_neon_dyadic_long (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_QDD, NS_QQQ, NS_QQR, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_QDD, NS_HHH, NS_FFF, NS_DDD, NS_NULL);
    if (rs == NS_QDD)
      {
        if (vfp_or_neon_is_neon (NEON_CHECK_ARCH | NEON_CHECK_CC) == FAIL)
@@ -19632,7 +19706,7 @@ neon_scalar_for_fmac_fp16_long (unsigned scalar, unsigned quad_p)
               | ((elno & 0x1) << 3));
      }
  
-bad_scalar:
+ bad_scalar:
    first_error (_("scalar out of range for multiply instruction"));
    return 0;
  }
@@ -19687,8 +19761,6 @@ do_neon_fmac_maybe_scalar_long (int subtype)
    inst.instruction &= 0x00ffffff;
    inst.instruction |= high8;
  
-#define LOW1(R) ((R) & 0x1)
-#define HI4(R) (((R) >> 1) & 0xf)
    /* Unlike usually NEON three-same, encoding for Vn and Vm will depend on
       whether the instruction is in Q form and whether Vm is a scalar indexed
       operand.  */
@@ -20593,6 +20665,9 @@ do_neon_tbl_tbx (void)
  static void
  do_neon_ldm_stm (void)
  {
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)
+             && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+             _(BAD_FPU));
    /* P, U and L bits are part of bitmask.  */
    int is_dbmode = (inst.instruction & (1 << 24)) != 0;
    unsigned offsetbits = inst.operands[1].imm * 2;
@@ -20620,6 +20695,49 @@ do_neon_ldm_stm (void)
    do_vfp_cond_or_thumb ();
  }
  
+static void
+do_vfp_nsyn_pop (void)
+{
+  nsyn_insert_sp ();
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
+    return do_vfp_nsyn_opcode ("vldm");
+  }
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
+             _(BAD_FPU));
+
+  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+             _("register list must contain at least 1 and at most 16 "
+               "registers"));
+
+  if (inst.operands[1].issingle)
+    do_vfp_nsyn_opcode ("fldmias");
+  else
+    do_vfp_nsyn_opcode ("fldmiad");
+}
+
+static void
+do_vfp_nsyn_push (void)
+{
+  nsyn_insert_sp ();
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
+    return do_vfp_nsyn_opcode ("vstmdb");
+  }
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
+             _(BAD_FPU));
+
+  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+             _("register list must contain at least 1 and at most 16 "
+               "registers"));
+
+  if (inst.operands[1].issingle)
+    do_vfp_nsyn_opcode ("fstmdbs");
+  else
+    do_vfp_nsyn_opcode ("fstmdbd");
+}
+
+
  static void
  do_neon_ldr_str (void)
  {
@@ -20700,7 +20818,8 @@ do_vldr_vstr (void)
    /* VLDR/VSTR.  */
    else
      {
-      if (!mark_feature_used (&fpu_vfp_ext_v1xd))
+      if (!mark_feature_used (&fpu_vfp_ext_v1xd)
+         && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
         as_bad (_("Instruction not permitted on this architecture"));
        do_neon_ldr_str ();
      }
@@ -21436,6 +21555,526 @@ do_neon_dotproduct_u (void)
    return do_neon_dotproduct (1);
  }
  
+static void
+do_vusdot (void)
+{
+  enum neon_shape rs;
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 16),
+                 _("indexed register must be less than 16"));
+      neon_three_args (rs == NS_QQS);
+      inst.instruction |= (index << 5);
+    }
+  else
+    {
+      inst.instruction |= (1 << 21);
+      rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+      neon_three_args (rs == NS_QQQ);
+    }
+}
+
+static void
+do_vsudot (void)
+{
+  enum neon_shape rs;
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 16),
+                 _("indexed register must be less than 16"));
+      neon_three_args (rs == NS_QQS);
+      inst.instruction |= (index << 5);
+    }
+}
+
+static void
+do_vsmmla (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+  neon_three_args (1);
+
+}
+
+static void
+do_vummla (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+  neon_three_args (1);
+
+}
+
+static void
+check_cde_operand (size_t index, int is_dual)
+{
+  unsigned Rx = inst.operands[index].reg;
+  bfd_boolean isvec = inst.operands[index].isvec;
+  if (is_dual == 0 && thumb_mode)
+    constraint (
+               !((Rx <= 14 && Rx != 13) || (Rx == REG_PC && isvec)),
+               _("Register must be r0-r14 except r13, or APSR_nzcv."));
+  else
+    constraint ( !((Rx <= 10 && Rx % 2 == 0 )),
+      _("Register must be an even register between r0-r10."));
+}
+
+static bfd_boolean
+cde_coproc_enabled (unsigned coproc)
+{
+  switch (coproc)
+  {
+    case 0: return mark_feature_used (&arm_ext_cde0);
+    case 1: return mark_feature_used (&arm_ext_cde1);
+    case 2: return mark_feature_used (&arm_ext_cde2);
+    case 3: return mark_feature_used (&arm_ext_cde3);
+    case 4: return mark_feature_used (&arm_ext_cde4);
+    case 5: return mark_feature_used (&arm_ext_cde5);
+    case 6: return mark_feature_used (&arm_ext_cde6);
+    case 7: return mark_feature_used (&arm_ext_cde7);
+    default: return FALSE;
+  }
+}
+
+#define cde_coproc_pos 8
+static void
+cde_handle_coproc (void)
+{
+  unsigned coproc = inst.operands[0].reg;
+  constraint (coproc > 7, _("CDE Coprocessor must be in range 0-7"));
+  constraint (!(cde_coproc_enabled (coproc)), BAD_CDE_COPROC);
+  inst.instruction |= coproc << cde_coproc_pos;
+}
+#undef cde_coproc_pos
+
+static void
+cxn_handle_predication (bfd_boolean is_accum)
+{
+  if (is_accum && conditional_insn ())
+    set_pred_insn_type (INSIDE_IT_INSN);
+  else if (conditional_insn ())
+  /* conditional_insn essentially checks for a suffix, not whether the
+     instruction is inside an IT block or not.
+     The non-accumulator versions should not have suffixes.  */
+    inst.error = BAD_SYNTAX;
+  else
+    set_pred_insn_type (OUTSIDE_PRED_INSN);
+}
+
+static void
+do_custom_instruction_1 (int is_dual, bfd_boolean is_accum)
+{
+
+  constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE));
+
+  unsigned imm, Rd;
+
+  Rd = inst.operands[1].reg;
+  check_cde_operand (1, is_dual);
+
+  if (is_dual == 1)
+    {
+      constraint (inst.operands[2].reg != Rd + 1,
+                 _("cx1d requires consecutive destination registers."));
+      imm = inst.operands[3].imm;
+    }
+  else if (is_dual == 0)
+    imm = inst.operands[2].imm;
+  else
+    abort ();
+
+  inst.instruction |= Rd << 12;
+  inst.instruction |= (imm & 0x1F80) << 9;
+  inst.instruction |= (imm & 0x0040) << 1;
+  inst.instruction |= (imm & 0x003f);
+
+  cde_handle_coproc ();
+  cxn_handle_predication (is_accum);
+}
+
+static void
+do_custom_instruction_2 (int is_dual, bfd_boolean is_accum)
+{
+
+  constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE));
+
+  unsigned imm, Rd, Rn;
+
+  Rd = inst.operands[1].reg;
+
+  if (is_dual == 1)
+    {
+      constraint (inst.operands[2].reg != Rd + 1,
+                 _("cx2d requires consecutive destination registers."));
+      imm = inst.operands[4].imm;
+      Rn = inst.operands[3].reg;
+    }
+  else if (is_dual == 0)
+  {
+    imm = inst.operands[3].imm;
+    Rn = inst.operands[2].reg;
+  }
+  else
+    abort ();
+
+  check_cde_operand (2 + is_dual, /* is_dual = */0);
+  check_cde_operand (1, is_dual);
+
+  inst.instruction |= Rd << 12;
+  inst.instruction |= Rn << 16;
+
+  inst.instruction |= (imm & 0x0380) << 13;
+  inst.instruction |= (imm & 0x0040) << 1;
+  inst.instruction |= (imm & 0x003f);
+
+  cde_handle_coproc ();
+  cxn_handle_predication (is_accum);
+}
+
+static void
+do_custom_instruction_3 (int is_dual, bfd_boolean is_accum)
+{
+
+  constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE));
+
+  unsigned imm, Rd, Rn, Rm;
+
+  Rd = inst.operands[1].reg;
+
+  if (is_dual == 1)
+    {
+      constraint (inst.operands[2].reg != Rd + 1,
+                 _("cx3d requires consecutive destination registers."));
+      imm = inst.operands[5].imm;
+      Rn = inst.operands[3].reg;
+      Rm = inst.operands[4].reg;
+    }
+  else if (is_dual == 0)
+  {
+    imm = inst.operands[4].imm;
+    Rn = inst.operands[2].reg;
+    Rm = inst.operands[3].reg;
+  }
+  else
+    abort ();
+
+  check_cde_operand (1, is_dual);
+  check_cde_operand (2 + is_dual, /* is_dual = */0);
+  check_cde_operand (3 + is_dual, /* is_dual = */0);
+
+  inst.instruction |= Rd;
+  inst.instruction |= Rn << 16;
+  inst.instruction |= Rm << 12;
+
+  inst.instruction |= (imm & 0x0038) << 17;
+  inst.instruction |= (imm & 0x0004) << 5;
+  inst.instruction |= (imm & 0x0003) << 4;
+
+  cde_handle_coproc ();
+  cxn_handle_predication (is_accum);
+}
+
+static void
+do_cx1 (void)
+{
+  return do_custom_instruction_1 (0, 0);
+}
+
+static void
+do_cx1a (void)
+{
+  return do_custom_instruction_1 (0, 1);
+}
+
+static void
+do_cx1d (void)
+{
+  return do_custom_instruction_1 (1, 0);
+}
+
+static void
+do_cx1da (void)
+{
+  return do_custom_instruction_1 (1, 1);
+}
+
+static void
+do_cx2 (void)
+{
+  return do_custom_instruction_2 (0, 0);
+}
+
+static void
+do_cx2a (void)
+{
+  return do_custom_instruction_2 (0, 1);
+}
+
+static void
+do_cx2d (void)
+{
+  return do_custom_instruction_2 (1, 0);
+}
+
+static void
+do_cx2da (void)
+{
+  return do_custom_instruction_2 (1, 1);
+}
+
+static void
+do_cx3 (void)
+{
+  return do_custom_instruction_3 (0, 0);
+}
+
+static void
+do_cx3a (void)
+{
+  return do_custom_instruction_3 (0, 1);
+}
+
+static void
+do_cx3d (void)
+{
+  return do_custom_instruction_3 (1, 0);
+}
+
+static void
+do_cx3da (void)
+{
+  return do_custom_instruction_3 (1, 1);
+}
+
+static void
+vcx_assign_vec_d (unsigned regnum)
+{
+  inst.instruction |= HI4 (regnum) << 12;
+  inst.instruction |= LOW1 (regnum) << 22;
+}
+
+static void
+vcx_assign_vec_m (unsigned regnum)
+{
+  inst.instruction |= HI4 (regnum);
+  inst.instruction |= LOW1 (regnum) << 5;
+}
+
+static void
+vcx_assign_vec_n (unsigned regnum)
+{
+  inst.instruction |= HI4 (regnum) << 16;
+  inst.instruction |= LOW1 (regnum) << 7;
+}
+
+enum vcx_reg_type {
+    q_reg,
+    d_reg,
+    s_reg
+};
+
+static enum vcx_reg_type
+vcx_get_reg_type (enum neon_shape ns)
+{
+  gas_assert (ns == NS_PQI
+             || ns == NS_PDI
+             || ns == NS_PFI
+             || ns == NS_PQQI
+             || ns == NS_PDDI
+             || ns == NS_PFFI
+             || ns == NS_PQQQI
+             || ns == NS_PDDDI
+             || ns == NS_PFFFI);
+  if (ns == NS_PQI || ns == NS_PQQI || ns == NS_PQQQI)
+    return q_reg;
+  if (ns == NS_PDI || ns == NS_PDDI || ns == NS_PDDDI)
+    return d_reg;
+  return s_reg;
+}
+
+#define vcx_size_pos 24
+#define vcx_vec_pos 6
+static unsigned
+vcx_handle_shape (enum vcx_reg_type reg_type)
+{
+  unsigned mult = 2;
+  if (reg_type == q_reg)
+    inst.instruction |= 1 << vcx_vec_pos;
+  else if (reg_type == d_reg)
+    inst.instruction |= 1 << vcx_size_pos;
+  else
+    mult = 1;
+  /* NOTE:
+     The documentation says that the Q registers are encoded as 2*N in the D:Vd
+     bits (or equivalent for N and M registers).
+     Similarly the D registers are encoded as N in D:Vd bits.
+     While the S registers are encoded as N in the Vd:D bits.
+
+     Taking into account the maximum values of these registers we can see a
+     nicer pattern for calculation:
+       Q -> 7, D -> 15, S -> 31
+
+     If we say that everything is encoded in the Vd:D bits, then we can say
+     that Q is encoded as 4*N, and D is encoded as 2*N.
+     This way the bits will end up the same, and calculation is simpler.
+     (calculation is now:
+       1. Multiply by a number determined by the register letter.
+       2. Encode resulting number in Vd:D bits.)
+
+      This is made a little more complicated by automatic handling of 'Q'
+      registers elsewhere, which means the register number is already 2*N where
+      N is the number the user wrote after the register letter.
+     */
+  return mult;
+}
+#undef vcx_vec_pos
+#undef vcx_size_pos
+
+static void
+vcx_ensure_register_in_range (unsigned R, enum vcx_reg_type reg_type)
+{
+  if (reg_type == q_reg)
+    {
+      gas_assert (R % 2 == 0);
+      constraint (R >= 16, _("'q' register must be in range 0-7"));
+    }
+  else if (reg_type == d_reg)
+    constraint (R >= 16, _("'d' register must be in range 0-15"));
+  else
+    constraint (R >= 32, _("'s' register must be in range 0-31"));
+}
+
+static void (*vcx_assign_vec[3]) (unsigned) = {
+    vcx_assign_vec_d,
+    vcx_assign_vec_m,
+    vcx_assign_vec_n
+};
+
+static void
+vcx_handle_register_arguments (unsigned num_registers,
+                              enum vcx_reg_type reg_type)
+{
+  unsigned R, i;
+  unsigned reg_mult = vcx_handle_shape (reg_type);
+  for (i = 0; i < num_registers; i++)
+    {
+      R = inst.operands[i+1].reg;
+      vcx_ensure_register_in_range (R, reg_type);
+      if (num_registers == 3 && i > 0)
+       {
+         if (i == 2)
+           vcx_assign_vec[1] (R * reg_mult);
+         else
+           vcx_assign_vec[2] (R * reg_mult);
+         continue;
+       }
+      vcx_assign_vec[i](R * reg_mult);
+    }
+}
+
+static void
+vcx_handle_insn_block (enum vcx_reg_type reg_type)
+{
+  if (reg_type == q_reg)
+    if (inst.cond > COND_ALWAYS)
+      inst.pred_insn_type = INSIDE_VPT_INSN;
+    else
+      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+  else if (inst.cond == COND_ALWAYS)
+    inst.pred_insn_type = OUTSIDE_PRED_INSN;
+  else
+    inst.error = BAD_NOT_IT;
+}
+
+static void
+vcx_handle_common_checks (unsigned num_args, enum neon_shape rs)
+{
+  constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE));
+  cde_handle_coproc ();
+  enum vcx_reg_type reg_type = vcx_get_reg_type (rs);
+  vcx_handle_register_arguments (num_args, reg_type);
+  vcx_handle_insn_block (reg_type);
+  if (reg_type == q_reg)
+    constraint (!mark_feature_used (&mve_ext),
+               _("vcx instructions with Q registers require MVE"));
+  else
+    constraint (!(ARM_FSET_CPU_SUBSET (armv8m_fp, cpu_variant)
+                 && mark_feature_used (&armv8m_fp))
+               && !mark_feature_used (&mve_ext),
+               _("vcx instructions with S or D registers require either MVE"
+                 " or Armv8-M floating point etension."));
+}
+
+static void
+do_vcx1 (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_PQI, NS_PDI, NS_PFI, NS_NULL);
+  vcx_handle_common_checks (1, rs);
+
+  unsigned imm = inst.operands[2].imm;
+  inst.instruction |= (imm & 0x03f);
+  inst.instruction |= (imm & 0x040) << 1;
+  inst.instruction |= (imm & 0x780) << 9;
+  if (rs != NS_PQI)
+    constraint (imm >= 2048,
+               _("vcx1 with S or D registers takes immediate within 0-2047"));
+  inst.instruction |= (imm & 0x800) << 13;
+}
+
+static void
+do_vcx2 (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_PQQI, NS_PDDI, NS_PFFI, NS_NULL);
+  vcx_handle_common_checks (2, rs);
+
+  unsigned imm = inst.operands[3].imm;
+  inst.instruction |= (imm & 0x01) << 4;
+  inst.instruction |= (imm & 0x02) << 6;
+  inst.instruction |= (imm & 0x3c) << 14;
+  if (rs != NS_PQQI)
+    constraint (imm >= 64,
+               _("vcx2 with S or D registers takes immediate within 0-63"));
+  inst.instruction |= (imm & 0x40) << 18;
+}
+
+static void
+do_vcx3 (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_PQQQI, NS_PDDDI, NS_PFFFI, NS_NULL);
+  vcx_handle_common_checks (3, rs);
+
+  unsigned imm = inst.operands[4].imm;
+  inst.instruction |= (imm & 0x1) << 4;
+  inst.instruction |= (imm & 0x6) << 19;
+  if (rs != NS_PQQQI)
+    constraint (imm >= 8,
+               _("vcx2 with S or D registers takes immediate within 0-7"));
+  inst.instruction |= (imm & 0x8) << 21;
+}
+
  /* Crypto v1 instructions.  */
  static void
  do_crypto_2op_1 (unsigned elttype, int op)
@@ -22401,6 +23040,7 @@ handle_pred_state (void)
             close_automatic_it_block ();
           break;
  
+         /* Fallthrough.  */
         case NEUTRAL_IT_INSN:
           now_pred.block_length++;
           now_pred.insn_cond = TRUE;
@@ -22653,9 +23293,11 @@ it_fsm_post_encode (void)
      handle_pred_state ();
  
    if (now_pred.insn_cond
+      && warn_on_restrict_it
        && !now_pred.warn_deprecated
        && warn_on_deprecated
-      && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8)
+      && (ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8)
+          || ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8r))
        && !ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_m))
      {
        if (inst.instruction >= 0x10000)
@@ -24367,9 +25009,9 @@ static const struct asm_opcode insns[] =
    nUF(sha256su0, _sha2op, 2, (RNQ, RNQ), sha256su0),
  
  #undef  ARM_VARIANT
-#define ARM_VARIANT   & crc_ext_armv8
+#define ARM_VARIANT   & arm_ext_crc
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT & crc_ext_armv8
+#define THUMB_VARIANT & arm_ext_crc
    TUEc("crc32b", 1000040, fac0f080, 3, (RR, oRR, RR), crc32b),
    TUEc("crc32h", 1200040, fac0f090, 3, (RR, oRR, RR), crc32h),
    TUEc("crc32w", 1400040, fac0f0a0, 3, (RR, oRR, RR), crc32w),
@@ -24848,6 +25490,16 @@ static const struct asm_opcode insns[] =
  #define THUMB_VARIANT  & arm_ext_v6t2
   mcCE(vmrs,    ef00a10, 2, (APSR_RR, RVC),   vmrs),
   mcCE(vmsr,    ee00a10, 2, (RVC, RR),        vmsr),
+ mcCE(fldd,    d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ mcCE(fstd,    d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ mcCE(flds,    d100a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
+ mcCE(fsts,    d000a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
+
+  /* Memory operations.         */
+ mcCE(fldmias, c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
+ mcCE(fldmdbs, d300a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
+ mcCE(fstmias, c800a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
+ mcCE(fstmdbs, d200a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
  #undef THUMB_VARIANT
  
    /* Moves and type conversions.  */
@@ -24862,19 +25514,13 @@ static const struct asm_opcode insns[] =
   cCE("fmxr",   ee00a10, 2, (RVC, RR),        rn_rd),
  
    /* Memory operations.         */
- cCE("flds",   d100a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
- cCE("fsts",   d000a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
- cCE("fldmias",        c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
   cCE("fldmfds",        c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
- cCE("fldmdbs",        d300a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
   cCE("fldmeas",        d300a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
   cCE("fldmiax",        c900b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmia),
   cCE("fldmfdx",        c900b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmia),
   cCE("fldmdbx",        d300b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmdb),
   cCE("fldmeax",        d300b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmdb),
- cCE("fstmias",        c800a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
   cCE("fstmeas",        c800a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
- cCE("fstmdbs",        d200a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
   cCE("fstmfds",        d200a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
   cCE("fstmiax",        c800b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmia),
   cCE("fstmeax",        c800b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmia),
@@ -24905,8 +25551,6 @@ static const struct asm_opcode insns[] =
  
   /* Double precision load/store are still present on single precision
      implementations.  */
- cCE("fldd",   d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
- cCE("fstd",   d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
   cCE("fldmiad",        c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
   cCE("fldmfdd",        c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
   cCE("fldmdbd",        d300b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmdb),
@@ -24959,6 +25603,19 @@ static const struct asm_opcode insns[] =
     Individual encoder functions perform additional architecture checks.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & fpu_vfp_ext_v1xd
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
+
+ NCE(vldm,      c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmia,    c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmdb,    d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstm,      c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmia,    c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmdb,    d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+
+ NCE(vpop,      0,       1, (VRSDLST),          vfp_nsyn_pop),
+ NCE(vpush,     0,       1, (VRSDLST),          vfp_nsyn_push),
+
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_vfp_ext_v1xd
  
@@ -24968,20 +25625,11 @@ static const struct asm_opcode insns[] =
   nCE(vnmul,     _vnmul,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmla,     _vnmla,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmls,     _vnmls,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
- NCE(vpush,     0,       1, (VRSDLST),          vfp_nsyn_push),
- NCE(vpop,      0,       1, (VRSDLST),          vfp_nsyn_pop),
   NCE(vcvtz,     0,       2, (RVSD, RVSD),       vfp_nsyn_cvtz),
  
    /* Mnemonics shared by Neon and VFP.  */
   nCEF(vmls,     _vmls,    3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
  
- NCE(vldm,      c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vldmia,    c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vldmdb,    d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstm,      c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstmia,    c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstmdb,    d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
-
   mnCEF(vcvt,     _vcvt,   3, (RNSDQMQ, RNSDQMQ, oI32z), neon_cvt),
   nCEF(vcvtr,    _vcvt,   2, (RNSDQ, RNSDQ), neon_cvtr),
   MNCEF(vcvtb,  eb20a40, 3, (RVSDMQ, RVSDMQ, oI32b), neon_cvtb),
@@ -25896,8 +26544,8 @@ static const struct asm_opcode insns[] =
  #define ARM_VARIANT & fpu_neon_ext_v1
   mnUF(vabd,      _vabd,                  3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su),
   mnUF(vabdl,     _vabdl,         3, (RNQMQ, RNDMQ, RNDMQ),   neon_dyadic_long),
- mnUF(vaddl,     _vaddl,         3, (RNQMQ, RNDMQ, RNDMQR),  neon_dyadic_long),
- mnUF(vsubl,     _vsubl,         3, (RNQMQ, RNDMQ, RNDMQR),  neon_dyadic_long),
+ mnUF(vaddl,     _vaddl,         3, (RNSDQMQ, oRNSDMQ, RNSDMQR),  neon_dyadic_long),
+ mnUF(vsubl,     _vsubl,         3, (RNSDQMQ, oRNSDMQ, RNSDMQR),  neon_dyadic_long),
   mnUF(vand,      _vand,                  3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic),
   mnUF(vbic,      _vbic,                  3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic),
   mnUF(vorr,      _vorr,                  3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic),
@@ -25953,10 +26601,38 @@ static const struct asm_opcode insns[] =
  #define        THUMB_VARIANT &arm_ext_i8mm
   TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
   TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla),
- TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vummla, vummla),
+ TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
   TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot),
   TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot),
+
+#undef ARM_VARIANT
+#undef THUMB_VARIANT
+#define        THUMB_VARIANT &arm_ext_cde
+ ToC ("cx1", ee000000, 3, (RCP, APSR_RR, I8191), cx1),
+ ToC ("cx1a", fe000000, 3, (RCP, APSR_RR, I8191), cx1a),
+ ToC ("cx1d", ee000040, 4, (RCP, RR, APSR_RR, I8191), cx1d),
+ ToC ("cx1da", fe000040, 4, (RCP, RR, APSR_RR, I8191), cx1da),
+
+ ToC ("cx2", ee400000, 4, (RCP, APSR_RR, APSR_RR, I511), cx2),
+ ToC ("cx2a", fe400000, 4, (RCP, APSR_RR, APSR_RR, I511), cx2a),
+ ToC ("cx2d", ee400040, 5, (RCP, RR, APSR_RR, APSR_RR, I511), cx2d),
+ ToC ("cx2da", fe400040, 5, (RCP, RR, APSR_RR, APSR_RR, I511), cx2da),
+
+ ToC ("cx3", ee800000, 5, (RCP, APSR_RR, APSR_RR, APSR_RR, I63), cx3),
+ ToC ("cx3a", fe800000, 5, (RCP, APSR_RR, APSR_RR, APSR_RR, I63), cx3a),
+ ToC ("cx3d", ee800040, 6, (RCP, RR, APSR_RR, APSR_RR, APSR_RR, I63), cx3d),
+ ToC ("cx3da", fe800040, 6, (RCP, RR, APSR_RR, APSR_RR, APSR_RR, I63), cx3da),
+
+ mToC ("vcx1", ec200000, 3, (RCP, RNSDMQ, I4095), vcx1),
+ mToC ("vcx1a", fc200000, 3, (RCP, RNSDMQ, I4095), vcx1),
+
+ mToC ("vcx2", ec300000, 4, (RCP, RNSDMQ, RNSDMQ, I127), vcx2),
+ mToC ("vcx2a", fc300000, 4, (RCP, RNSDMQ, RNSDMQ, I127), vcx2),
+
+ mToC ("vcx3", ec800000, 5, (RCP, RNSDMQ, RNSDMQ, RNSDMQ, I15), vcx3),
+ mToC ("vcx3a", fc800000, 5, (RCP, RNSDMQ, RNSDMQ, RNSDMQ, I15), vcx3),
  };
+
  #undef ARM_VARIANT
  #undef THUMB_VARIANT
  #undef TCE
@@ -26868,7 +27544,7 @@ start_unwind_section (const segT text_seg, int idx)
    const char * text_name;
    const char * prefix;
    const char * prefix_once;
-  const char * group_name;
+  struct elf_section_match match;
    char * sec_name;
    int type;
    int flags;
@@ -26902,13 +27578,13 @@ start_unwind_section (const segT text_seg, int idx)
  
    flags = SHF_ALLOC;
    linkonce = 0;
-  group_name = 0;
+  memset (&match, 0, sizeof (match));
  
    /* Handle COMDAT group.  */
    if (prefix != prefix_once && (text_seg->flags & SEC_LINK_ONCE) != 0)
      {
-      group_name = elf_group_name (text_seg);
-      if (group_name == NULL)
+      match.group_name = elf_group_name (text_seg);
+      if (match.group_name == NULL)
         {
           as_bad (_("Group section `%s' has no group signature"),
                   segment_name (text_seg));
@@ -26919,7 +27595,7 @@ start_unwind_section (const segT text_seg, int idx)
        linkonce = 1;
      }
  
-  obj_elf_change_section (sec_name, type, 0, flags, 0, group_name,
+  obj_elf_change_section (sec_name, type, flags, 0, &match,
                           linkonce, 0);
  
    /* Set the section link for index tables.  */
@@ -28616,6 +29292,9 @@ md_apply_fix (fixS *    fixP,
                           (((unsigned long) fixP->fx_frag->fr_address
                             + (unsigned long) fixP->fx_where) & ~3)
                           + (unsigned long) value);
+         else if (get_recorded_alignment (seg) < 2)
+           as_warn_where (fixP->fx_file, fixP->fx_line,
+                          _("section does not have enough alignment to ensure safe PC-relative loads"));
  
           if (value & ~0x3fc)
             as_bad_where (fixP->fx_file, fixP->fx_line,
@@ -30390,6 +31069,11 @@ struct arm_option_table arm_opts[] =
    {"mwarn-deprecated", NULL, &warn_on_deprecated, 1, NULL},
    {"mno-warn-deprecated", N_("do not warn on use of deprecated feature"),
     &warn_on_deprecated, 0, NULL},
+
+  {"mwarn-restrict-it", N_("warn about performance deprecated IT instructions"
+   " in ARMv8-A and ARMv8-R"), &warn_on_restrict_it, 1, NULL},
+  {"mno-warn-restrict-it", NULL, &warn_on_restrict_it, 0, NULL},
+
    {"mwarn-syms", N_("warn about symbols that match instruction names [default]"), (int *) (& flag_warn_syms), TRUE, NULL},
    {"mno-warn-syms", N_("disable warnings about symobls that match instructions"), (int *) (& flag_warn_syms), FALSE, NULL},
    {NULL, NULL, NULL, 0, NULL}
@@ -30807,25 +31491,25 @@ static const struct arm_cpu_option_table arm_cpus[] =
                ARM_ARCH_NONE,
                FPU_ARCH_NEON_VFP_V4),
    ARM_CPU_OPT ("cortex-a32",     "Cortex-A32",        ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a35",     "Cortex-A35",        ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a53",     "Cortex-A53",        ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a55",    "Cortex-A55",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
    ARM_CPU_OPT ("cortex-a57",     "Cortex-A57",        ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a72",     "Cortex-A72",        ARM_ARCH_V8A,
-             ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
               FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a73",     "Cortex-A73",        ARM_ARCH_V8A,
-             ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
               FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a75",    "Cortex-A75",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
@@ -30858,7 +31542,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
                ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV),
                FPU_ARCH_VFP_V3D16),
    ARM_CPU_OPT ("cortex-r52",     "Cortex-R52",        ARM_ARCH_V8R,
-             ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
               FPU_ARCH_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-m35p",    "Cortex-M35P",       ARM_ARCH_V8M_MAIN,
                ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
@@ -30888,7 +31572,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
                ARM_ARCH_NONE,
                FPU_NONE),
    ARM_CPU_OPT ("exynos-m1",      "Samsung Exynos M1", ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("neoverse-n1",    "Neoverse N1",               ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
@@ -30927,7 +31611,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
                ARM_ARCH_NONE,
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("xgene2",         "APM X-Gene 2",      ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
  
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, ARM_ARCH_NONE, NULL }
@@ -31047,7 +31731,7 @@ static const struct arm_ext_table armv7em_ext_table[] =
  
  static const struct arm_ext_table armv8a_ext_table[] =
  {
-  ARM_ADD ("crc", ARCH_CRC_ARMV8),
+  ARM_ADD ("crc", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC)),
    ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
@@ -31080,6 +31764,8 @@ static const struct arm_ext_table armv82a_ext_table[] =
    ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1),
    ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_2_FP16),
    ARM_ADD ("fp16fml", FPU_ARCH_NEON_VFP_ARMV8_2_FP16FML),
+  ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
    ARM_ADD ("dotprod", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
@@ -31096,6 +31782,8 @@ static const struct arm_ext_table armv84a_ext_table[] =
  {
    ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
    ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+  ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
  
@@ -31111,6 +31799,8 @@ static const struct arm_ext_table armv85a_ext_table[] =
  {
    ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
    ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+  ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
  
@@ -31122,22 +31812,35 @@ static const struct arm_ext_table armv85a_ext_table[] =
  
  static const struct arm_ext_table armv86a_ext_table[] =
  {
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  
+#define CDE_EXTENSIONS \
+  ARM_ADD ("cdecp0", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE0)), \
+  ARM_ADD ("cdecp1", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE1)), \
+  ARM_ADD ("cdecp2", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE2)), \
+  ARM_ADD ("cdecp3", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE3)), \
+  ARM_ADD ("cdecp4", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE4)), \
+  ARM_ADD ("cdecp5", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE5)), \
+  ARM_ADD ("cdecp6", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE6)), \
+  ARM_ADD ("cdecp7", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE7))
+
  static const struct arm_ext_table armv8m_main_ext_table[] =
  {
-  ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
-                 ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP)),
+  ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_AEXT_V8M_MAIN_DSP),
+                 ARM_FEATURE_CORE_LOW (ARM_AEXT_V8M_MAIN_DSP)),
    ARM_EXT ("fp", FPU_ARCH_VFP_V5_SP_D16, ALL_FP),
    ARM_ADD ("fp.dp", FPU_ARCH_VFP_V5D16),
+  CDE_EXTENSIONS,
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  
+
  static const struct arm_ext_table armv8_1m_main_ext_table[] =
  {
-  ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
-                 ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP)),
+  ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_AEXT_V8M_MAIN_DSP),
+                 ARM_FEATURE_CORE_LOW (ARM_AEXT_V8M_MAIN_DSP)),
    ARM_EXT ("fp",
            ARM_FEATURE (0, ARM_EXT2_FP16_INST,
                         FPU_VFP_V5_SP_D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA),
@@ -31145,18 +31848,21 @@ static const struct arm_ext_table armv8_1m_main_ext_table[] =
    ARM_ADD ("fp.dp",
            ARM_FEATURE (0, ARM_EXT2_FP16_INST,
                         FPU_VFP_V5D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
-  ARM_EXT ("mve", ARM_FEATURE_COPROC (FPU_MVE),
-          ARM_FEATURE_COPROC (FPU_MVE | FPU_MVE_FP)),
+  ARM_EXT ("mve", ARM_FEATURE (ARM_AEXT_V8M_MAIN_DSP, ARM_EXT2_MVE, 0),
+          ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE | ARM_EXT2_MVE_FP)),
    ARM_ADD ("mve.fp",
-          ARM_FEATURE (0, ARM_EXT2_FP16_INST,
-                       FPU_MVE | FPU_MVE_FP | FPU_VFP_V5_SP_D16 |
-                       FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
+          ARM_FEATURE (ARM_AEXT_V8M_MAIN_DSP,
+                       ARM_EXT2_FP16_INST | ARM_EXT2_MVE | ARM_EXT2_MVE_FP,
+                       FPU_VFP_V5_SP_D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
+  CDE_EXTENSIONS,
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  
+#undef CDE_EXTENSIONS
+
  static const struct arm_ext_table armv8r_ext_table[] =
  {
-  ARM_ADD ("crc", ARCH_CRC_ARMV8),
+  ARM_ADD ("crc", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC)),
    ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
@@ -31261,10 +31967,8 @@ struct arm_option_extension_value_table
     use the context sensitive approach using arm_ext_table's.  */
  static const struct arm_option_extension_value_table arm_extensions[] =
  {
-  ARM_EXT_OPT ("bf16",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
-                       ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
-                       ARM_ARCH_V8_2A),
-  ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+  ARM_EXT_OPT ("crc",   ARM_FEATURE_CORE_HIGH(ARM_EXT2_CRC),
+                        ARM_FEATURE_CORE_HIGH(ARM_EXT2_CRC),
                          ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                          ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8),
@@ -32171,14 +32875,16 @@ get_aeabi_cpu_arch_from_fset (const arm_feature_set *arch_ext_fset,
    if (p_ver_ret == NULL)
      return -1;
  
-found:
+ found:
    /* Tag_CPU_arch_profile.  */
-  if (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v7a)
-      || ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8)
-      || (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_atomics)
-         && !ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8m_m_only)))
+  if (!ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8r)
+      && (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v7a)
+          || ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8)
+          || (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_atomics)
+              && !ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8m_m_only))))
      *profile = 'A';
-  else if (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v7r))
+  else if (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v7r)
+      || ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8r))
      *profile = 'R';
    else if (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_m))
      *profile = 'M';
@@ -32489,6 +33195,7 @@ s_arm_arch (int ignored ATTRIBUTE_UNUSED)
      if (streq (opt->name, name))
        {
         selected_arch = opt->value;
+       selected_ctx_ext_table = opt->ext_table;
         selected_ext = arm_arch_none;
         selected_cpu = selected_arch;
         strcpy (selected_cpu_name, opt->name);
@@ -32649,6 +33356,7 @@ s_arm_fpu (int ignored ATTRIBUTE_UNUSED)
      if (streq (opt->name, name))
        {
         selected_fpu = opt->value;
+       ARM_CLEAR_FEATURE (selected_cpu, selected_cpu, fpu_any);
  #ifndef CPU_DEFAULT
         if (no_cpu_selected ())
           ARM_MERGE_FEATURE_SETS (cpu_variant, arm_arch_any, selected_fpu);