[PATCH 5/57][Arm][GAS] Add support for MVE instructions: vmull{b,t}

[deliverable/binutils-gdb.git] / gas / config / tc-arm.c
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c

index f52cf7fac27f6e3fc43e36863438aec3f7e3bc2a..f325dcf1f1161d105b098b1463f09dfe0833a99b 100644 (file)
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -1,5 +1,5 @@
  /* tc-arm.c -- Assemble for the ARM
-   Copyright (C) 1994-2017 Free Software Foundation, Inc.
+   Copyright (C) 1994-2019 Free Software Foundation, Inc.
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
         Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
@@ -75,6 +75,9 @@ static struct
    unsigned       sp_restored:1;
  } unwind;
  
+/* Whether --fdpic was given.  */
+static int arm_fdpic;
+
  #endif /* OBJ_ELF */
  
  /* Results from operand parsing worker functions.  */
@@ -123,7 +126,12 @@ enum arm_float_abi
  
  #define streq(a, b)          (strcmp (a, b) == 0)
  
+/* Current set of feature bits available (CPU+FPU).  Different from
+   selected_cpu + selected_fpu in case of autodetection since the CPU
+   feature bits are then all set.  */
  static arm_feature_set cpu_variant;
+/* Feature bits used in each execution state.  Used to set build attribute
+   (in particular Tag_*_ISA_use) in CPU autodetection mode.  */
  static arm_feature_set arm_arch_used;
  static arm_feature_set thumb_arch_used;
  
@@ -143,17 +151,24 @@ bfd_boolean codecomposer_syntax = FALSE;
  /* Variables that we set while parsing command-line options.  Once all
     options have been read we re-process these values to set the real
     assembly flags.  */
+
+/* CPU and FPU feature bits set for legacy CPU and FPU options (eg. -marm1
+   instead of -mcpu=arm1).  */
  static const arm_feature_set *legacy_cpu = NULL;
  static const arm_feature_set *legacy_fpu = NULL;
  
+/* CPU, extension and FPU feature bits selected by -mcpu.  */
  static const arm_feature_set *mcpu_cpu_opt = NULL;
-static arm_feature_set *dyn_mcpu_ext_opt = NULL;
+static arm_feature_set *mcpu_ext_opt = NULL;
  static const arm_feature_set *mcpu_fpu_opt = NULL;
+
+/* CPU, extension and FPU feature bits selected by -march.  */
  static const arm_feature_set *march_cpu_opt = NULL;
-static arm_feature_set *dyn_march_ext_opt = NULL;
+static arm_feature_set *march_ext_opt = NULL;
  static const arm_feature_set *march_fpu_opt = NULL;
+
+/* Feature bits selected by -mfpu.  */
  static const arm_feature_set *mfpu_opt = NULL;
-static const arm_feature_set *object_arch = NULL;
  
  /* Constants for known architecture features.  */
  static const arm_feature_set fpu_default = FPU_DEFAULT;
@@ -189,6 +204,9 @@ static const arm_feature_set arm_ext_v5j = ARM_FEATURE_CORE_LOW (ARM_EXT_V5J);
  static const arm_feature_set arm_ext_v6 = ARM_FEATURE_CORE_LOW (ARM_EXT_V6);
  static const arm_feature_set arm_ext_v6k = ARM_FEATURE_CORE_LOW (ARM_EXT_V6K);
  static const arm_feature_set arm_ext_v6t2 = ARM_FEATURE_CORE_LOW (ARM_EXT_V6T2);
+/* Only for compatability of hint instructions.  */
+static const arm_feature_set arm_ext_v6k_v6t2 =
+  ARM_FEATURE_CORE_LOW (ARM_EXT_V6K | ARM_EXT_V6T2);
  static const arm_feature_set arm_ext_v6_notm =
    ARM_FEATURE_CORE_LOW (ARM_EXT_V6_NOTM);
  static const arm_feature_set arm_ext_v6_dsp =
@@ -217,6 +235,8 @@ static const arm_feature_set arm_ext_pan = ARM_FEATURE_CORE_HIGH (ARM_EXT2_PAN);
  static const arm_feature_set arm_ext_v8m = ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8M);
  static const arm_feature_set arm_ext_v8m_main =
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8M_MAIN);
+static const arm_feature_set arm_ext_v8_1m_main =
+ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_1M_MAIN);
  /* Instructions in ARMv8-M only found in M profile architectures.  */
  static const arm_feature_set arm_ext_v8m_m_only =
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8M | ARM_EXT2_V8M_MAIN);
@@ -235,8 +255,16 @@ static const arm_feature_set arm_ext_ras =
  /* FP16 instructions.  */
  static const arm_feature_set arm_ext_fp16 =
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST);
+static const arm_feature_set arm_ext_fp16_fml =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_FML);
+static const arm_feature_set arm_ext_v8_2 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_2A);
  static const arm_feature_set arm_ext_v8_3 =
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A);
+static const arm_feature_set arm_ext_sb =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB);
+static const arm_feature_set arm_ext_predres =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
  
  static const arm_feature_set arm_arch_any = ARM_ANY;
  #ifdef OBJ_ELF
@@ -274,6 +302,10 @@ static const arm_feature_set fpu_neon_ext_v1 =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_V1);
  static const arm_feature_set fpu_vfp_v3_or_neon_ext =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
+static const arm_feature_set mve_ext =
+  ARM_FEATURE_COPROC (FPU_MVE);
+static const arm_feature_set mve_fp_ext =
+  ARM_FEATURE_COPROC (FPU_MVE_FP);
  #ifdef OBJ_ELF
  static const arm_feature_set fpu_vfp_fp16 =
    ARM_FEATURE_COPROC (FPU_VFP_EXT_FP16);
@@ -298,8 +330,20 @@ static const arm_feature_set fpu_neon_ext_dotprod =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD);
  
  static int mfloat_abi_opt = -1;
-/* Record user cpu selection for object attributes.  */
+/* Architecture feature bits selected by the last -mcpu/-march or .cpu/.arch
+   directive.  */
+static arm_feature_set selected_arch = ARM_ARCH_NONE;
+/* Extension feature bits selected by the last -mcpu/-march or .arch_extension
+   directive.  */
+static arm_feature_set selected_ext = ARM_ARCH_NONE;
+/* Feature bits selected by the last -mcpu/-march or by the combination of the
+   last .cpu/.arch directive .arch_extension directives since that
+   directive.  */
  static arm_feature_set selected_cpu = ARM_ARCH_NONE;
+/* FPU feature bits selected by the last -mfpu or .fpu directive.  */
+static arm_feature_set selected_fpu = FPU_NONE;
+/* Feature bits selected by the last .object_arch directive.  */
+static arm_feature_set selected_object_arch = ARM_ARCH_NONE;
  /* Must be long enough to hold any of the names in arm_cpus.  */
  static char selected_cpu_name[20];
  
@@ -409,20 +453,25 @@ struct neon_type
    unsigned elems;
  };
  
-enum it_instruction_type
+enum pred_instruction_type
  {
-   OUTSIDE_IT_INSN,
+   OUTSIDE_PRED_INSN,
+   INSIDE_VPT_INSN,
     INSIDE_IT_INSN,
     INSIDE_IT_LAST_INSN,
     IF_INSIDE_IT_LAST_INSN, /* Either outside or inside;
                               if inside, should be the last one.  */
     NEUTRAL_IT_INSN,        /* This could be either inside or outside,
                               i.e. BKPT and NOP.  */
-   IT_INSN                 /* The IT insn has been parsed.  */
+   IT_INSN,               /* The IT insn has been parsed.  */
+   VPT_INSN,              /* The VPT/VPST insn has been parsed.  */
+   MVE_OUTSIDE_PRED_INSN   /* Instruction to indicate a MVE instruction without
+                             a predication code.  */
  };
  
  /* The maximum number of operands we need.  */
  #define ARM_IT_MAX_OPERANDS 6
+#define ARM_IT_MAX_RELOCS 3
  
  struct arm_it
  {
@@ -447,9 +496,9 @@ struct arm_it
      bfd_reloc_code_real_type type;
      expressionS                     exp;
      int                             pc_rel;
-  } reloc;
+  } relocs[ARM_IT_MAX_RELOCS];
  
-  enum it_instruction_type it_insn_type;
+  enum pred_instruction_type pred_insn_type;
  
    struct
    {
@@ -466,7 +515,7 @@ struct arm_it
         instructions. This allows us to disambiguate ARM <-> vector insns.  */
      unsigned regisimm   : 1;  /* 64-bit immediate, reg forms high 32 bits.  */
      unsigned isvec      : 1;  /* Is a single, double or quad VFP/Neon reg.  */
-    unsigned isquad     : 1;  /* Operand is Neon quad-precision register.  */
+    unsigned isquad     : 1;  /* Operand is SIMD quad register.  */
      unsigned issingle   : 1;  /* Operand is VFP single-precision register.  */
      unsigned hasreloc  : 1;  /* Operand has relocation suffix.  */
      unsigned writeback : 1;  /* Operand has trailing !  */
@@ -487,9 +536,6 @@ const char * fp_const[] =
    "0.0", "1.0", "2.0", "3.0", "4.0", "5.0", "0.5", "10.0", 0
  };
  
-/* Number of littlenums required to hold an extended precision number. */
-#define MAX_LITTLENUMS 6
-
  LITTLENUM_TYPE fp_values[NUM_FLOAT_VALS][MAX_LITTLENUMS];
  
  #define FAIL   (-1)
@@ -540,7 +586,7 @@ struct asm_barrier_opt
  
  struct reloc_entry
  {
-  const char *                    name;
+  const char *              name;
    bfd_reloc_code_real_type  reloc;
  };
  
@@ -567,7 +613,8 @@ struct neon_typed_alias
  };
  
  /* ARM register categories.  This includes coprocessor numbers and various
-   architecture extensions' registers. */
+   architecture extensions' registers.  Each entry should have an error message
+   in reg_expected_msgs below.  */
  enum arm_reg_type
  {
    REG_TYPE_RN,
@@ -579,6 +626,7 @@ enum arm_reg_type
    REG_TYPE_NQ,
    REG_TYPE_VFSD,
    REG_TYPE_NDQ,
+  REG_TYPE_NSD,
    REG_TYPE_NSDQ,
    REG_TYPE_VFC,
    REG_TYPE_MVF,
@@ -586,12 +634,13 @@ enum arm_reg_type
    REG_TYPE_MVFX,
    REG_TYPE_MVDX,
    REG_TYPE_MVAX,
+  REG_TYPE_MQ,
    REG_TYPE_DSPSC,
    REG_TYPE_MMXWR,
    REG_TYPE_MMXWC,
    REG_TYPE_MMXWCG,
    REG_TYPE_XSCALE,
-  REG_TYPE_RNB
+  REG_TYPE_RNB,
  };
  
  /* Structure for a hash table entry for a register.
@@ -610,27 +659,31 @@ struct reg_entry
  /* Diagnostics used when we don't get a register of the expected type. */
  const char * const reg_expected_msgs[] =
  {
-  N_("ARM register expected"),
-  N_("bad or missing co-processor number"),
-  N_("co-processor register expected"),
-  N_("FPA register expected"),
-  N_("VFP single precision register expected"),
-  N_("VFP/Neon double precision register expected"),
-  N_("Neon quad precision register expected"),
-  N_("VFP single or double precision register expected"),
-  N_("Neon double or quad precision register expected"),
-  N_("VFP single, double or Neon quad precision register expected"),
-  N_("VFP system register expected"),
-  N_("Maverick MVF register expected"),
-  N_("Maverick MVD register expected"),
-  N_("Maverick MVFX register expected"),
-  N_("Maverick MVDX register expected"),
-  N_("Maverick MVAX register expected"),
-  N_("Maverick DSPSC register expected"),
-  N_("iWMMXt data register expected"),
-  N_("iWMMXt control register expected"),
-  N_("iWMMXt scalar register expected"),
-  N_("XScale accumulator register expected"),
+  [REG_TYPE_RN]            = N_("ARM register expected"),
+  [REG_TYPE_CP]            = N_("bad or missing co-processor number"),
+  [REG_TYPE_CN]            = N_("co-processor register expected"),
+  [REG_TYPE_FN]            = N_("FPA register expected"),
+  [REG_TYPE_VFS]    = N_("VFP single precision register expected"),
+  [REG_TYPE_VFD]    = N_("VFP/Neon double precision register expected"),
+  [REG_TYPE_NQ]            = N_("Neon quad precision register expected"),
+  [REG_TYPE_VFSD]   = N_("VFP single or double precision register expected"),
+  [REG_TYPE_NDQ]    = N_("Neon double or quad precision register expected"),
+  [REG_TYPE_NSD]    = N_("Neon single or double precision register expected"),
+  [REG_TYPE_NSDQ]   = N_("VFP single, double or Neon quad precision register"
+                        " expected"),
+  [REG_TYPE_VFC]    = N_("VFP system register expected"),
+  [REG_TYPE_MVF]    = N_("Maverick MVF register expected"),
+  [REG_TYPE_MVD]    = N_("Maverick MVD register expected"),
+  [REG_TYPE_MVFX]   = N_("Maverick MVFX register expected"),
+  [REG_TYPE_MVDX]   = N_("Maverick MVDX register expected"),
+  [REG_TYPE_MVAX]   = N_("Maverick MVAX register expected"),
+  [REG_TYPE_DSPSC]  = N_("Maverick DSPSC register expected"),
+  [REG_TYPE_MMXWR]  = N_("iWMMXt data register expected"),
+  [REG_TYPE_MMXWC]  = N_("iWMMXt control register expected"),
+  [REG_TYPE_MMXWCG] = N_("iWMMXt scalar register expected"),
+  [REG_TYPE_XSCALE] = N_("XScale accumulator register expected"),
+  [REG_TYPE_MQ]            = N_("MVE vector register expected"),
+  [REG_TYPE_RNB]    = N_("")
  };
  
  /* Some well known registers that we refer to directly elsewhere.  */
@@ -655,7 +708,7 @@ struct asm_opcode
    unsigned int tag : 4;
  
    /* Basic instruction code.  */
-  unsigned int avalue : 28;
+  unsigned int avalue;
  
    /* Thumb-format instruction code.  */
    unsigned int tvalue;
@@ -669,6 +722,9 @@ struct asm_opcode
  
    /* Function to call to encode instruction in Thumb format.  */
    void (* tencode) (void);
+
+  /* Indicates whether this instruction may be vector predicated.  */
+  unsigned int mayBeVecPred : 1;
  };
  
  /* Defines for various bits that we will want to toggle.  */
@@ -791,19 +847,27 @@ struct asm_opcode
  #define THUMB_LOAD_BIT 0x0800
  #define THUMB2_LOAD_BIT 0x00100000
  
+#define BAD_SYNTAX     _("syntax error")
  #define BAD_ARGS       _("bad arguments to instruction")
  #define BAD_SP          _("r13 not allowed here")
  #define BAD_PC         _("r15 not allowed here")
+#define BAD_ODD                _("Odd register not allowed here")
+#define BAD_EVEN       _("Even register not allowed here")
  #define BAD_COND       _("instruction cannot be conditional")
  #define BAD_OVERLAP    _("registers may not be the same")
  #define BAD_HIREG      _("lo register required")
  #define BAD_THUMB32    _("instruction not supported in Thumb16 mode")
  #define BAD_ADDR_MODE   _("instruction does not accept this addressing mode");
  #define BAD_BRANCH     _("branch must be last instruction in IT block")
+#define BAD_BRANCH_OFF _("branch out of range or not a multiple of 2")
  #define BAD_NOT_IT     _("instruction not allowed in IT block")
+#define BAD_NOT_VPT    _("instruction missing MVE vector predication code")
  #define BAD_FPU                _("selected FPU does not support instruction")
  #define BAD_OUT_IT     _("thumb conditional instruction should be in IT block")
+#define BAD_OUT_VPT    \
+       _("vector predicated instruction should be in VPT/VPST block")
  #define BAD_IT_COND    _("incorrect condition in IT block")
+#define BAD_VPT_COND   _("incorrect condition in VPT/VPST block")
  #define BAD_IT_IT      _("IT falling in the range of a previous IT block")
  #define MISSING_FNSTART        _("missing .fnstart before unwinding directive")
  #define BAD_PC_ADDRESSING \
@@ -814,9 +878,24 @@ struct asm_opcode
  #define BAD_FP16       _("selected processor does not support fp16 instruction")
  #define UNPRED_REG(R)  _("using " R " results in unpredictable behaviour")
  #define THUMB1_RELOC_ONLY  _("relocation valid in thumb1 code only")
+#define MVE_NOT_IT     _("Warning: instruction is UNPREDICTABLE in an IT " \
+                         "block")
+#define MVE_NOT_VPT    _("Warning: instruction is UNPREDICTABLE in a VPT " \
+                         "block")
+#define MVE_BAD_PC     _("Warning: instruction is UNPREDICTABLE with PC" \
+                         " operand")
+#define MVE_BAD_SP     _("Warning: instruction is UNPREDICTABLE with SP" \
+                         " operand")
+#define BAD_SIMD_TYPE  _("bad type in SIMD instruction")
+#define BAD_MVE_AUTO   \
+  _("GAS auto-detection mode and -march=all is deprecated for MVE, please" \
+    " use a valid -march or -mcpu option.")
+#define BAD_MVE_SRCDEST        _("Warning: 32-bit element size and same destination "\
+                         "and source operands makes instruction UNPREDICTABLE")
  
  static struct hash_control * arm_ops_hsh;
  static struct hash_control * arm_cond_hsh;
+static struct hash_control * arm_vcond_hsh;
  static struct hash_control * arm_shift_hsh;
  static struct hash_control * arm_psr_hsh;
  static struct hash_control * arm_v7m_psr_hsh;
@@ -868,15 +947,15 @@ typedef enum asmfunc_states
  static asmfunc_states asmfunc_state = OUTSIDE_ASMFUNC;
  
  #ifdef OBJ_ELF
-#  define now_it seg_info (now_seg)->tc_segment_info_data.current_it
+#  define now_pred seg_info (now_seg)->tc_segment_info_data.current_pred
  #else
-static struct current_it now_it;
+static struct current_pred now_pred;
  #endif
  
  static inline int
-now_it_compatible (int cond)
+now_pred_compatible (int cond)
  {
-  return (cond & ~1) == (now_it.cc & ~1);
+  return (cond & ~1) == (now_pred.cc & ~1);
  }
  
  static inline int
@@ -885,39 +964,39 @@ conditional_insn (void)
    return inst.cond != COND_ALWAYS;
  }
  
-static int in_it_block (void);
+static int in_pred_block (void);
  
-static int handle_it_state (void);
+static int handle_pred_state (void);
  
  static void force_automatic_it_block_close (void);
  
  static void it_fsm_post_encode (void);
  
-#define set_it_insn_type(type)                 \
+#define set_pred_insn_type(type)                       \
    do                                           \
      {                                          \
-      inst.it_insn_type = type;                        \
-      if (handle_it_state () == FAIL)          \
+      inst.pred_insn_type = type;                      \
+      if (handle_pred_state () == FAIL)                \
         return;                                 \
      }                                          \
    while (0)
  
-#define set_it_insn_type_nonvoid(type, failret) \
+#define set_pred_insn_type_nonvoid(type, failret) \
    do                                           \
      {                                           \
-      inst.it_insn_type = type;                        \
-      if (handle_it_state () == FAIL)          \
+      inst.pred_insn_type = type;                      \
+      if (handle_pred_state () == FAIL)                \
         return failret;                         \
      }                                          \
    while(0)
  
-#define set_it_insn_type_last()                                \
+#define set_pred_insn_type_last()                              \
    do                                                   \
      {                                                  \
        if (inst.cond == COND_ALWAYS)                    \
-       set_it_insn_type (IF_INSIDE_IT_LAST_INSN);      \
+       set_pred_insn_type (IF_INSIDE_IT_LAST_INSN);    \
        else                                             \
-       set_it_insn_type (INSIDE_IT_LAST_INSN);         \
+       set_pred_insn_type (INSIDE_IT_LAST_INSN);               \
      }                                                  \
    while (0)
  
@@ -977,11 +1056,11 @@ skip_past_char (char ** str, char c)
  
  /* Return TRUE if anything in the expression is a bignum.  */
  
-static int
+static bfd_boolean
  walk_no_bignums (symbolS * sp)
  {
    if (symbol_get_value_expression (sp)->X_op == O_big)
-    return 1;
+    return TRUE;
  
    if (symbol_get_value_expression (sp)->X_add_symbol)
      {
@@ -990,10 +1069,10 @@ walk_no_bignums (symbolS * sp)
                   && walk_no_bignums (symbol_get_value_expression (sp)->X_op_symbol)));
      }
  
-  return 0;
+  return FALSE;
  }
  
-static int in_my_get_expression = 0;
+static bfd_boolean in_my_get_expression = FALSE;
  
  /* Third argument to my_get_expression.         */
  #define GE_NO_PREFIX 0
@@ -1007,7 +1086,6 @@ static int
  my_get_expression (expressionS * ep, char ** str, int prefix_mode)
  {
    char * save_in;
-  segT  seg;
  
    /* In unified syntax, all prefixes are optional.  */
    if (unified_syntax)
@@ -1030,16 +1108,17 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
        if (is_immediate_prefix (**str))
         (*str)++;
        break;
-    default: abort ();
+    default:
+      abort ();
      }
  
    memset (ep, 0, sizeof (expressionS));
  
    save_in = input_line_pointer;
    input_line_pointer = *str;
-  in_my_get_expression = 1;
-  seg = expression (ep);
-  in_my_get_expression = 0;
+  in_my_get_expression = TRUE;
+  expression (ep);
+  in_my_get_expression = FALSE;
  
    if (ep->X_op == O_illegal || ep->X_op == O_absent)
      {
@@ -1052,22 +1131,6 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
        return 1;
      }
  
-#ifdef OBJ_AOUT
-  if (seg != absolute_section
-      && seg != text_section
-      && seg != data_section
-      && seg != bss_section
-      && seg != undefined_section)
-    {
-      inst.error = _("bad segment");
-      *str = input_line_pointer;
-      input_line_pointer = save_in;
-      return 1;
-    }
-#else
-  (void) seg;
-#endif
-
    /* Get rid of any bignums now, so that we don't generate an error for which
       we can't establish a line number later on.         Big numbers are never valid
       in instructions, which is where this routine is always called.  */
@@ -1086,7 +1149,7 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
  
    *str = input_line_pointer;
    input_line_pointer = save_in;
-  return 0;
+  return SUCCESS;
  }
  
  /* Turn a string in input_line_pointer into a floating point constant
@@ -1181,6 +1244,7 @@ md_atof (int type, char * litP, int * sizeP)
  
  /* We handle all bad expressions here, so that we can report the faulty
     instruction in the error message.  */
+
  void
  md_operand (expressionS * exp)
  {
@@ -1190,10 +1254,11 @@ md_operand (expressionS * exp)
  
  /* Immediate values.  */
  
+#ifdef OBJ_ELF
  /* Generic immediate-value read function for use in directives.
     Accepts anything that 'expression' can fold to a constant.
     *val receives the number.  */
-#ifdef OBJ_ELF
+
  static int
  immediate_for_directive (int *val)
  {
@@ -1463,6 +1528,41 @@ parse_neon_operand_type (struct neon_type_el *vectype, char **ccp)
  #define NEON_ALL_LANES         15
  #define NEON_INTERLEAVE_LANES  14
  
+/* Record a use of the given feature.  */
+static void
+record_feature_use (const arm_feature_set *feature)
+{
+  if (thumb_mode)
+    ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature);
+  else
+    ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature);
+}
+
+/* If the given feature available in the selected CPU, mark it as used.
+   Returns TRUE iff feature is available.  */
+static bfd_boolean
+mark_feature_used (const arm_feature_set *feature)
+{
+
+  /* Do not support the use of MVE only instructions when in auto-detection or
+     -march=all.  */
+  if (((feature == &mve_ext) || (feature == &mve_fp_ext))
+      && ARM_CPU_IS_ANY (cpu_variant))
+    {
+      first_error (BAD_MVE_AUTO);
+      return FALSE;
+    }
+  /* Ensure the option is valid on the current architecture.  */
+  if (!ARM_CPU_HAS_FEATURE (cpu_variant, *feature))
+    return FALSE;
+
+  /* Add the appropriate architecture feature for the barrier option used.
+     */
+  record_feature_use (feature);
+
+  return TRUE;
+}
+
  /* Parse either a register or a scalar, with an optional type. Return the
     register number, and optionally fill in the actual type of the register
     when multiple alternatives were given (NEON_TYPE_NDQ) in *RTYPE, and
@@ -1503,10 +1603,32 @@ parse_typed_reg_or_scalar (char **ccp, enum arm_reg_type type,
        || (type == REG_TYPE_NSDQ
           && (reg->type == REG_TYPE_VFS || reg->type == REG_TYPE_VFD
               || reg->type == REG_TYPE_NQ))
+      || (type == REG_TYPE_NSD
+         && (reg->type == REG_TYPE_VFS || reg->type == REG_TYPE_VFD))
        || (type == REG_TYPE_MMXWC
           && (reg->type == REG_TYPE_MMXWCG)))
      type = (enum arm_reg_type) reg->type;
  
+  if (type == REG_TYPE_MQ)
+    {
+      if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       return FAIL;
+
+      if (!reg || reg->type != REG_TYPE_NQ)
+       return FAIL;
+
+      if (reg->number > 14 && !mark_feature_used (&fpu_vfp_ext_d32))
+       {
+         first_error (_("expected MVE register [q0..q7]"));
+         return FAIL;
+       }
+      type = REG_TYPE_NQ;
+    }
+  else if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+          && (type == REG_TYPE_NQ))
+    return FAIL;
+
+
    if (type != reg->type)
      return FAIL;
  
@@ -1526,7 +1648,9 @@ parse_typed_reg_or_scalar (char **ccp, enum arm_reg_type type,
  
    if (skip_past_char (&str, '[') == SUCCESS)
      {
-      if (type != REG_TYPE_VFD)
+      if (type != REG_TYPE_VFD
+         && !(type == REG_TYPE_VFS
+              && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_2)))
         {
           first_error (_("only D registers may be indexed"));
           return FAIL;
@@ -1572,7 +1696,7 @@ parse_typed_reg_or_scalar (char **ccp, enum arm_reg_type type,
    return reg->number;
  }
  
-/* Like arm_reg_parse, but allow allow the following extra features:
+/* Like arm_reg_parse, but also allow the following extra features:
      - If RTYPE is non-zero, return the (possibly restricted) type of the
        register (e.g. Neon double or quad reg when either has been requested).
      - If this is a Neon vector type with additional type information, fill
@@ -1622,8 +1746,12 @@ parse_scalar (char **ccp, int elsize, struct neon_type_el *type)
    int reg;
    char *str = *ccp;
    struct neon_typed_alias atype;
+  enum arm_reg_type reg_type = REG_TYPE_VFD;
  
-  reg = parse_typed_reg_or_scalar (&str, REG_TYPE_VFD, NULL, &atype);
+  if (elsize == 4)
+    reg_type = REG_TYPE_VFS;
+
+  reg = parse_typed_reg_or_scalar (&str, reg_type, NULL, &atype);
  
    if (reg == FAIL || (atype.defined & NTA_HASINDEX) == 0)
      return FAIL;
@@ -1647,14 +1775,29 @@ parse_scalar (char **ccp, int elsize, struct neon_type_el *type)
    return reg * 16 + atype.index;
  }
  
+/* Types of registers in a list.  */
+
+enum reg_list_els
+{
+  REGLIST_RN,
+  REGLIST_CLRM,
+  REGLIST_VFP_S,
+  REGLIST_VFP_S_VPR,
+  REGLIST_VFP_D,
+  REGLIST_VFP_D_VPR,
+  REGLIST_NEON_D
+};
+
  /* Parse an ARM register list.  Returns the bitmask, or FAIL.  */
  
  static long
-parse_reg_list (char ** strp)
+parse_reg_list (char ** strp, enum reg_list_els etype)
  {
-  char * str = * strp;
-  long  range = 0;
-  int   another_range;
+  char *str = *strp;
+  long range = 0;
+  int another_range;
+
+  gas_assert (etype == REGLIST_RN || etype == REGLIST_CLRM);
  
    /* We come back here if we get ranges concatenated by '+' or '|'.  */
    do
@@ -1672,11 +1815,35 @@ parse_reg_list (char ** strp)
           do
             {
               int reg;
+             const char apsr_str[] = "apsr";
+             int apsr_str_len = strlen (apsr_str);
  
-             if ((reg = arm_reg_parse (&str, REG_TYPE_RN)) == FAIL)
+             reg = arm_reg_parse (&str, REGLIST_RN);
+             if (etype == REGLIST_CLRM)
                 {
-                 first_error (_(reg_expected_msgs[REG_TYPE_RN]));
-                 return FAIL;
+                 if (reg == REG_SP || reg == REG_PC)
+                   reg = FAIL;
+                 else if (reg == FAIL
+                          && !strncasecmp (str, apsr_str, apsr_str_len)
+                          && !ISALPHA (*(str + apsr_str_len)))
+                   {
+                     reg = 15;
+                     str += apsr_str_len;
+                   }
+
+                 if (reg == FAIL)
+                   {
+                     first_error (_("r0-r12, lr or APSR expected"));
+                     return FAIL;
+                   }
+               }
+             else /* etype == REGLIST_RN.  */
+               {
+                 if (reg == FAIL)
+                   {
+                     first_error (_(reg_expected_msgs[REGLIST_RN]));
+                     return FAIL;
+                   }
                 }
  
               if (in_range)
@@ -1720,7 +1887,7 @@ parse_reg_list (char ** strp)
               return FAIL;
             }
         }
-      else
+      else if (etype == REGLIST_RN)
         {
           expressionS exp;
  
@@ -1751,15 +1918,15 @@ parse_reg_list (char ** strp)
             }
           else
             {
-             if (inst.reloc.type != 0)
+             if (inst.relocs[0].type != 0)
                 {
                   inst.error = _("expression too complex");
                   return FAIL;
                 }
  
-             memcpy (&inst.reloc.exp, &exp, sizeof (expressionS));
-             inst.reloc.type = BFD_RELOC_ARM_MULTI;
-             inst.reloc.pc_rel = 0;
+             memcpy (&inst.relocs[0].exp, &exp, sizeof (expressionS));
+             inst.relocs[0].type = BFD_RELOC_ARM_MULTI;
+             inst.relocs[0].pc_rel = 0;
             }
         }
  
@@ -1775,15 +1942,6 @@ parse_reg_list (char ** strp)
    return range;
  }
  
-/* Types of registers in a list.  */
-
-enum reg_list_els
-{
-  REGLIST_VFP_S,
-  REGLIST_VFP_D,
-  REGLIST_NEON_D
-};
-
  /* Parse a VFP register list.  If the string is invalid return FAIL.
     Otherwise return the number of registers, and set PBASE to the first
     register.  Parses registers of type ETYPE.
@@ -1800,7 +1958,8 @@ enum reg_list_els
     bug.  */
  
  static int
-parse_vfp_reg_list (char **ccp, unsigned int *pbase, enum reg_list_els etype)
+parse_vfp_reg_list (char **ccp, unsigned int *pbase, enum reg_list_els etype,
+                   bfd_boolean *partial_match)
  {
    char *str = *ccp;
    int base_reg;
@@ -1811,6 +1970,9 @@ parse_vfp_reg_list (char **ccp, unsigned int *pbase, enum reg_list_els etype)
    int warned = 0;
    unsigned long mask = 0;
    int i;
+  bfd_boolean vpr_seen = FALSE;
+  bfd_boolean expect_vpr =
+    (etype == REGLIST_VFP_S_VPR) || (etype == REGLIST_VFP_D_VPR);
  
    if (skip_past_char (&str, '{') == FAIL)
      {
@@ -1821,20 +1983,25 @@ parse_vfp_reg_list (char **ccp, unsigned int *pbase, enum reg_list_els etype)
    switch (etype)
      {
      case REGLIST_VFP_S:
+    case REGLIST_VFP_S_VPR:
        regtype = REG_TYPE_VFS;
        max_regs = 32;
        break;
  
      case REGLIST_VFP_D:
+    case REGLIST_VFP_D_VPR:
        regtype = REG_TYPE_VFD;
        break;
  
      case REGLIST_NEON_D:
        regtype = REG_TYPE_NDQ;
        break;
+
+    default:
+      gas_assert (0);
      }
  
-  if (etype != REGLIST_VFP_S)
+  if (etype != REGLIST_VFP_S && etype != REGLIST_VFP_S_VPR)
      {
        /* VFPv3 allows 32 D registers, except for the VFPv3-D16 variant.  */
        if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_d32))
@@ -1852,19 +2019,54 @@ parse_vfp_reg_list (char **ccp, unsigned int *pbase, enum reg_list_els etype)
      }
  
    base_reg = max_regs;
+  *partial_match = FALSE;
  
    do
      {
        int setmask = 1, addregs = 1;
+      const char vpr_str[] = "vpr";
+      int vpr_str_len = strlen (vpr_str);
  
        new_base = arm_typed_reg_parse (&str, regtype, &regtype, NULL);
  
-      if (new_base == FAIL)
+      if (expect_vpr)
+       {
+         if (new_base == FAIL
+             && !strncasecmp (str, vpr_str, vpr_str_len)
+             && !ISALPHA (*(str + vpr_str_len))
+             && !vpr_seen)
+           {
+             vpr_seen = TRUE;
+             str += vpr_str_len;
+             if (count == 0)
+               base_reg = 0; /* Canonicalize VPR only on d0 with 0 regs.  */
+           }
+         else if (vpr_seen)
+           {
+             first_error (_("VPR expected last"));
+             return FAIL;
+           }
+         else if (new_base == FAIL)
+           {
+             if (regtype == REG_TYPE_VFS)
+               first_error (_("VFP single precision register or VPR "
+                              "expected"));
+             else /* regtype == REG_TYPE_VFD.  */
+               first_error (_("VFP/Neon double precision register or VPR "
+                              "expected"));
+             return FAIL;
+           }
+       }
+      else if (new_base == FAIL)
         {
           first_error (_(reg_expected_msgs[regtype]));
           return FAIL;
         }
  
+      *partial_match = TRUE;
+      if (vpr_seen)
+       continue;
+
        if (new_base >= max_regs)
         {
           first_error (_("register out of range in list"));
@@ -1887,7 +2089,7 @@ parse_vfp_reg_list (char **ccp, unsigned int *pbase, enum reg_list_els etype)
           return FAIL;
         }
  
-      if ((mask >> new_base) != 0 && ! warned)
+      if ((mask >> new_base) != 0 && ! warned && !vpr_seen)
         {
           as_tsktsk (_("register list not in ascending order"));
           warned = 1;
@@ -1942,11 +2144,17 @@ parse_vfp_reg_list (char **ccp, unsigned int *pbase, enum reg_list_els etype)
    str++;
  
    /* Sanity check -- should have raised a parse error above.  */
-  if (count == 0 || count > max_regs)
+  if ((!vpr_seen && count == 0) || count > max_regs)
      abort ();
  
    *pbase = base_reg;
  
+  if (expect_vpr && !vpr_seen)
+    {
+      first_error (_("VPR expected last"));
+      return FAIL;
+    }
+
    /* Final test -- the registers must be consecutive.  */
    mask >>= base_reg;
    for (i = 0; i < count; i++)
@@ -3209,7 +3417,7 @@ add_to_lit_pool (unsigned int nbytes)
      {
        imm1 = inst.operands[1].imm;
        imm2 = (inst.operands[1].regisimm ? inst.operands[1].reg
-              : inst.reloc.exp.X_unsigned ? 0
+              : inst.relocs[0].exp.X_unsigned ? 0
                : ((bfd_int64_t) inst.operands[1].imm) >> 32);
        if (target_big_endian)
         {
@@ -3225,23 +3433,23 @@ add_to_lit_pool (unsigned int nbytes)
      {
        if (nbytes == 4)
         {
-         if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
-             && (inst.reloc.exp.X_op == O_constant)
+         if ((pool->literals[entry].X_op == inst.relocs[0].exp.X_op)
+             && (inst.relocs[0].exp.X_op == O_constant)
               && (pool->literals[entry].X_add_number
-                 == inst.reloc.exp.X_add_number)
+                 == inst.relocs[0].exp.X_add_number)
               && (pool->literals[entry].X_md == nbytes)
               && (pool->literals[entry].X_unsigned
-                 == inst.reloc.exp.X_unsigned))
+                 == inst.relocs[0].exp.X_unsigned))
             break;
  
-         if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
-             && (inst.reloc.exp.X_op == O_symbol)
+         if ((pool->literals[entry].X_op == inst.relocs[0].exp.X_op)
+             && (inst.relocs[0].exp.X_op == O_symbol)
               && (pool->literals[entry].X_add_number
-                 == inst.reloc.exp.X_add_number)
+                 == inst.relocs[0].exp.X_add_number)
               && (pool->literals[entry].X_add_symbol
-                 == inst.reloc.exp.X_add_symbol)
+                 == inst.relocs[0].exp.X_add_symbol)
               && (pool->literals[entry].X_op_symbol
-                 == inst.reloc.exp.X_op_symbol)
+                 == inst.relocs[0].exp.X_op_symbol)
               && (pool->literals[entry].X_md == nbytes))
             break;
         }
@@ -3251,11 +3459,11 @@ add_to_lit_pool (unsigned int nbytes)
                && (pool->literals[entry].X_op == O_constant)
                && (pool->literals[entry].X_add_number == (offsetT) imm1)
                && (pool->literals[entry].X_unsigned
-                  == inst.reloc.exp.X_unsigned)
+                  == inst.relocs[0].exp.X_unsigned)
                && (pool->literals[entry + 1].X_op == O_constant)
                && (pool->literals[entry + 1].X_add_number == (offsetT) imm2)
                && (pool->literals[entry + 1].X_unsigned
-                  == inst.reloc.exp.X_unsigned))
+                  == inst.relocs[0].exp.X_unsigned))
         break;
  
        padding_slot_p = ((pool->literals[entry].X_md >> 8) == PADDING_SLOT);
@@ -3287,8 +3495,8 @@ add_to_lit_pool (unsigned int nbytes)
  
              We also check to make sure the literal operand is a
              constant number.  */
-         if (!(inst.reloc.exp.X_op == O_constant
-               || inst.reloc.exp.X_op == O_big))
+         if (!(inst.relocs[0].exp.X_op == O_constant
+               || inst.relocs[0].exp.X_op == O_big))
             {
               inst.error = _("invalid type for literal pool");
               return FAIL;
@@ -3301,7 +3509,7 @@ add_to_lit_pool (unsigned int nbytes)
                   return FAIL;
                 }
  
-             pool->literals[entry] = inst.reloc.exp;
+             pool->literals[entry] = inst.relocs[0].exp;
               pool->literals[entry].X_op = O_constant;
               pool->literals[entry].X_add_number = 0;
               pool->literals[entry++].X_md = (PADDING_SLOT << 8) | 4;
@@ -3314,22 +3522,22 @@ add_to_lit_pool (unsigned int nbytes)
               return FAIL;
             }
  
-         pool->literals[entry] = inst.reloc.exp;
+         pool->literals[entry] = inst.relocs[0].exp;
           pool->literals[entry].X_op = O_constant;
           pool->literals[entry].X_add_number = imm1;
-         pool->literals[entry].X_unsigned = inst.reloc.exp.X_unsigned;
+         pool->literals[entry].X_unsigned = inst.relocs[0].exp.X_unsigned;
           pool->literals[entry++].X_md = 4;
-         pool->literals[entry] = inst.reloc.exp;
+         pool->literals[entry] = inst.relocs[0].exp;
           pool->literals[entry].X_op = O_constant;
           pool->literals[entry].X_add_number = imm2;
-         pool->literals[entry].X_unsigned = inst.reloc.exp.X_unsigned;
+         pool->literals[entry].X_unsigned = inst.relocs[0].exp.X_unsigned;
           pool->literals[entry].X_md = 4;
           pool->alignment = 3;
           pool->next_free_entry += 1;
         }
        else
         {
-         pool->literals[entry] = inst.reloc.exp;
+         pool->literals[entry] = inst.relocs[0].exp;
           pool->literals[entry].X_md = 4;
         }
  
@@ -3345,13 +3553,13 @@ add_to_lit_pool (unsigned int nbytes)
      }
    else if (padding_slot_p)
      {
-      pool->literals[entry] = inst.reloc.exp;
+      pool->literals[entry] = inst.relocs[0].exp;
        pool->literals[entry].X_md = nbytes;
      }
  
-  inst.reloc.exp.X_op        = O_symbol;
-  inst.reloc.exp.X_add_number = pool_size;
-  inst.reloc.exp.X_add_symbol = pool->symbol;
+  inst.relocs[0].exp.X_op            = O_symbol;
+  inst.relocs[0].exp.X_add_number = pool_size;
+  inst.relocs[0].exp.X_add_symbol = pool->symbol;
  
    return SUCCESS;
  }
@@ -3552,7 +3760,9 @@ s_arm_elf_cons (int nbytes)
                 }
  
               if (size > nbytes)
-               as_bad (_("%s relocations do not fit in %d bytes"),
+               as_bad (ngettext ("%s relocations do not fit in %d byte",
+                                 "%s relocations do not fit in %d bytes",
+                                 nbytes),
                         howto->name, nbytes);
               else
                 {
@@ -3638,10 +3848,10 @@ emit_insn (expressionS *exp, int nbytes)
             }
           else
             {
-             if (now_it.state == AUTOMATIC_IT_BLOCK)
-               set_it_insn_type_nonvoid (OUTSIDE_IT_INSN, 0);
+             if (now_pred.state == AUTOMATIC_PRED_BLOCK)
+               set_pred_insn_type_nonvoid (OUTSIDE_PRED_INSN, 0);
               else
-               set_it_insn_type_nonvoid (NEUTRAL_IT_INSN, 0);
+               set_pred_insn_type_nonvoid (NEUTRAL_IT_INSN, 0);
  
               if (thumb_mode && (size > THUMB_SIZE) && !target_big_endian)
                 emit_thumb32_expr (exp);
@@ -3945,7 +4155,7 @@ s_arm_unwind_save_core (void)
    long range;
    int n;
  
-  range = parse_reg_list (&input_line_pointer);
+  range = parse_reg_list (&input_line_pointer, REGLIST_RN);
    if (range == FAIL)
      {
        as_bad (_("expected register list"));
@@ -4072,8 +4282,10 @@ s_arm_unwind_save_vfp_armv6 (void)
    valueT op;
    int num_vfpv3_regs = 0;
    int num_regs_below_16;
+  bfd_boolean partial_match;
  
-  count = parse_vfp_reg_list (&input_line_pointer, &start, REGLIST_VFP_D);
+  count = parse_vfp_reg_list (&input_line_pointer, &start, REGLIST_VFP_D,
+                             &partial_match);
    if (count == FAIL)
      {
        as_bad (_("expected register list"));
@@ -4120,8 +4332,10 @@ s_arm_unwind_save_vfp (void)
    int count;
    unsigned int reg;
    valueT op;
+  bfd_boolean partial_match;
  
-  count = parse_vfp_reg_list (&input_line_pointer, &reg, REGLIST_VFP_D);
+  count = parse_vfp_reg_list (&input_line_pointer, &reg, REGLIST_VFP_D,
+                             &partial_match);
    if (count == FAIL)
      {
        as_bad (_("expected register list"));
@@ -4619,7 +4833,7 @@ s_arm_eabi_attribute (int ignored ATTRIBUTE_UNUSED)
  {
    int tag = obj_elf_vendor_attribute (OBJ_ATTR_PROC);
  
-  if (tag < NUM_KNOWN_OBJ_ATTRIBUTES)
+  if (tag >= 0 && tag < NUM_KNOWN_OBJ_ATTRIBUTES)
      attributes_set_explicitly[tag] = 1;
  }
  
@@ -4736,7 +4950,7 @@ const pseudo_typeS md_pseudo_table[] =
    {"4byte", cons, 4},
    {"8byte", cons, 8},
    /* These are used for dwarf2.  */
-  { "file", (void (*) (int)) dwarf2_directive_file, 0 },
+  { "file", dwarf2_directive_file, 0 },
    { "loc",  dwarf2_directive_loc,  0 },
    { "loc_mark_labels", dwarf2_directive_loc_mark_labels, 0 },
  #endif
@@ -4769,6 +4983,7 @@ parse_immediate (char **str, int *val, int min, int max,
                  bfd_boolean prefix_opt)
  {
    expressionS exp;
+
    my_get_expression (&exp, str, prefix_opt ? GE_OPT_PREFIX : GE_IMM_PREFIX);
    if (exp.X_op != O_constant)
      {
@@ -5169,7 +5384,7 @@ parse_shift (char **str, int i, enum parse_shift_mode mode)
           inst.operands[i].imm = reg;
           inst.operands[i].immisreg = 1;
         }
-      else if (my_get_expression (&inst.reloc.exp, &p, GE_IMM_PREFIX))
+      else if (my_get_expression (&inst.relocs[0].exp, &p, GE_IMM_PREFIX))
         return FAIL;
      }
    inst.operands[i].shift_kind = shift;
@@ -5201,8 +5416,8 @@ parse_shifter_operand (char **str, int i)
        inst.operands[i].isreg = 1;
  
        /* parse_shift will override this if appropriate */
-      inst.reloc.exp.X_op = O_constant;
-      inst.reloc.exp.X_add_number = 0;
+      inst.relocs[0].exp.X_op = O_constant;
+      inst.relocs[0].exp.X_add_number = 0;
  
        if (skip_past_comma (str) == FAIL)
         return SUCCESS;
@@ -5211,7 +5426,7 @@ parse_shifter_operand (char **str, int i)
        return parse_shift (str, i, NO_SHIFT_RESTRICT);
      }
  
-  if (my_get_expression (&inst.reloc.exp, str, GE_IMM_PREFIX))
+  if (my_get_expression (&inst.relocs[0].exp, str, GE_IMM_PREFIX))
      return FAIL;
  
    if (skip_past_comma (str) == SUCCESS)
@@ -5220,7 +5435,7 @@ parse_shifter_operand (char **str, int i)
        if (my_get_expression (&exp, str, GE_NO_PREFIX))
         return FAIL;
  
-      if (exp.X_op != O_constant || inst.reloc.exp.X_op != O_constant)
+      if (exp.X_op != O_constant || inst.relocs[0].exp.X_op != O_constant)
         {
           inst.error = _("constant expression expected");
           return FAIL;
@@ -5232,19 +5447,20 @@ parse_shifter_operand (char **str, int i)
           inst.error = _("invalid rotation");
           return FAIL;
         }
-      if (inst.reloc.exp.X_add_number < 0 || inst.reloc.exp.X_add_number > 255)
+      if (inst.relocs[0].exp.X_add_number < 0
+         || inst.relocs[0].exp.X_add_number > 255)
         {
           inst.error = _("invalid constant");
           return FAIL;
         }
  
        /* Encode as specified.  */
-      inst.operands[i].imm = inst.reloc.exp.X_add_number | value << 7;
+      inst.operands[i].imm = inst.relocs[0].exp.X_add_number | value << 7;
        return SUCCESS;
      }
  
-  inst.reloc.type = BFD_RELOC_ARM_IMMEDIATE;
-  inst.reloc.pc_rel = 0;
+  inst.relocs[0].type = BFD_RELOC_ARM_IMMEDIATE;
+  inst.relocs[0].pc_rel = 0;
    return SUCCESS;
  }
  
@@ -5415,12 +5631,12 @@ parse_shifter_operand_group_reloc (char **str, int i)
  
        /* We now have the group relocation table entry corresponding to
          the name in the assembler source.  Next, we parse the expression.  */
-      if (my_get_expression (&inst.reloc.exp, str, GE_NO_PREFIX))
+      if (my_get_expression (&inst.relocs[0].exp, str, GE_NO_PREFIX))
         return PARSE_OPERAND_FAIL_NO_BACKTRACK;
  
        /* Record the relocation type (always the ALU variant here).  */
-      inst.reloc.type = (bfd_reloc_code_real_type) entry->alu_code;
-      gas_assert (inst.reloc.type != 0);
+      inst.relocs[0].type = (bfd_reloc_code_real_type) entry->alu_code;
+      gas_assert (inst.relocs[0].type != 0);
  
        return PARSE_OPERAND_SUCCESS;
      }
@@ -5459,23 +5675,23 @@ parse_neon_alignment (char **str, int i)
  }
  
  /* Parse all forms of an ARM address expression.  Information is written
-   to inst.operands[i] and/or inst.reloc.
+   to inst.operands[i] and/or inst.relocs[0].
  
     Preindexed addressing (.preind=1):
  
-   [Rn, #offset]       .reg=Rn .reloc.exp=offset
+   [Rn, #offset]       .reg=Rn .relocs[0].exp=offset
     [Rn, +/-Rm]        .reg=Rn .imm=Rm .immisreg=1 .negative=0/1
     [Rn, +/-Rm, shift]  .reg=Rn .imm=Rm .immisreg=1 .negative=0/1
-                      .shift_kind=shift .reloc.exp=shift_imm
+                      .shift_kind=shift .relocs[0].exp=shift_imm
  
     These three may have a trailing ! which causes .writeback to be set also.
  
     Postindexed addressing (.postind=1, .writeback=1):
  
-   [Rn], #offset       .reg=Rn .reloc.exp=offset
+   [Rn], #offset       .reg=Rn .relocs[0].exp=offset
     [Rn], +/-Rm        .reg=Rn .imm=Rm .immisreg=1 .negative=0/1
     [Rn], +/-Rm, shift  .reg=Rn .imm=Rm .immisreg=1 .negative=0/1
-                      .shift_kind=shift .reloc.exp=shift_imm
+                      .shift_kind=shift .relocs[0].exp=shift_imm
  
     Unindexed addressing (.preind=0, .postind=0):
  
@@ -5484,11 +5700,11 @@ parse_neon_alignment (char **str, int i)
     Other:
  
     [Rn]{!}            shorthand for [Rn,#0]{!}
-   =immediate         .isreg=0 .reloc.exp=immediate
-   label              .reg=PC .reloc.pc_rel=1 .reloc.exp=label
+   =immediate         .isreg=0 .relocs[0].exp=immediate
+   label              .reg=PC .relocs[0].pc_rel=1 .relocs[0].exp=label
  
    It is the caller's responsibility to check for addressing modes not
-  supported by the instruction, and to set inst.reloc.type.  */
+  supported by the instruction, and to set inst.relocs[0].type.  */
  
  static parse_operand_result
  parse_address_main (char **str, int i, int group_relocations,
@@ -5502,15 +5718,15 @@ parse_address_main (char **str, int i, int group_relocations,
        if (skip_past_char (&p, '=') == FAIL)
         {
           /* Bare address - translate to PC-relative offset.  */
-         inst.reloc.pc_rel = 1;
+         inst.relocs[0].pc_rel = 1;
           inst.operands[i].reg = REG_PC;
           inst.operands[i].isreg = 1;
           inst.operands[i].preind = 1;
  
-         if (my_get_expression (&inst.reloc.exp, &p, GE_OPT_PREFIX_BIG))
+         if (my_get_expression (&inst.relocs[0].exp, &p, GE_OPT_PREFIX_BIG))
             return PARSE_OPERAND_FAIL;
         }
-      else if (parse_big_immediate (&p, i, &inst.reloc.exp,
+      else if (parse_big_immediate (&p, i, &inst.relocs[0].exp,
                                     /*allow_symbol_p=*/TRUE))
         return PARSE_OPERAND_FAIL;
  
@@ -5585,29 +5801,32 @@ parse_address_main (char **str, int i, int group_relocations,
               /* We now have the group relocation table entry corresponding to
                  the name in the assembler source.  Next, we parse the
                  expression.  */
-             if (my_get_expression (&inst.reloc.exp, &p, GE_NO_PREFIX))
+             if (my_get_expression (&inst.relocs[0].exp, &p, GE_NO_PREFIX))
                 return PARSE_OPERAND_FAIL_NO_BACKTRACK;
  
               /* Record the relocation type.  */
               switch (group_type)
                 {
                   case GROUP_LDR:
-                   inst.reloc.type = (bfd_reloc_code_real_type) entry->ldr_code;
+                   inst.relocs[0].type
+                       = (bfd_reloc_code_real_type) entry->ldr_code;
                     break;
  
                   case GROUP_LDRS:
-                   inst.reloc.type = (bfd_reloc_code_real_type) entry->ldrs_code;
+                   inst.relocs[0].type
+                       = (bfd_reloc_code_real_type) entry->ldrs_code;
                     break;
  
                   case GROUP_LDC:
-                   inst.reloc.type = (bfd_reloc_code_real_type) entry->ldc_code;
+                   inst.relocs[0].type
+                       = (bfd_reloc_code_real_type) entry->ldc_code;
                     break;
  
                   default:
                     gas_assert (0);
                 }
  
-             if (inst.reloc.type == 0)
+             if (inst.relocs[0].type == 0)
                 {
                   inst.error = _("this group relocation is not allowed on this instruction");
                   return PARSE_OPERAND_FAIL_NO_BACKTRACK;
@@ -5616,11 +5835,12 @@ parse_address_main (char **str, int i, int group_relocations,
           else
             {
               char *q = p;
-             if (my_get_expression (&inst.reloc.exp, &p, GE_IMM_PREFIX))
+
+             if (my_get_expression (&inst.relocs[0].exp, &p, GE_IMM_PREFIX))
                 return PARSE_OPERAND_FAIL;
               /* If the offset is 0, find out if it's a +0 or -0.  */
-             if (inst.reloc.exp.X_op == O_constant
-                 && inst.reloc.exp.X_add_number == 0)
+             if (inst.relocs[0].exp.X_op == O_constant
+                 && inst.relocs[0].exp.X_add_number == 0)
                 {
                   skip_whitespace (q);
                   if (*q == '#')
@@ -5706,16 +5926,17 @@ parse_address_main (char **str, int i, int group_relocations,
           else
             {
               char *q = p;
+
               if (inst.operands[i].negative)
                 {
                   inst.operands[i].negative = 0;
                   p--;
                 }
-             if (my_get_expression (&inst.reloc.exp, &p, GE_IMM_PREFIX))
+             if (my_get_expression (&inst.relocs[0].exp, &p, GE_IMM_PREFIX))
                 return PARSE_OPERAND_FAIL;
               /* If the offset is 0, find out if it's a +0 or -0.  */
-             if (inst.reloc.exp.X_op == O_constant
-                 && inst.reloc.exp.X_add_number == 0)
+             if (inst.relocs[0].exp.X_op == O_constant
+                 && inst.relocs[0].exp.X_add_number == 0)
                 {
                   skip_whitespace (q);
                   if (*q == '#')
@@ -5735,8 +5956,8 @@ parse_address_main (char **str, int i, int group_relocations,
    if (inst.operands[i].preind == 0 && inst.operands[i].postind == 0)
      {
        inst.operands[i].preind = 1;
-      inst.reloc.exp.X_op = O_constant;
-      inst.reloc.exp.X_add_number = 0;
+      inst.relocs[0].exp.X_op = O_constant;
+      inst.relocs[0].exp.X_add_number = 0;
      }
    *str = p;
    return PARSE_OPERAND_SUCCESS;
@@ -5764,28 +5985,28 @@ parse_half (char **str)
    p = *str;
    skip_past_char (&p, '#');
    if (strncasecmp (p, ":lower16:", 9) == 0)
-    inst.reloc.type = BFD_RELOC_ARM_MOVW;
+    inst.relocs[0].type = BFD_RELOC_ARM_MOVW;
    else if (strncasecmp (p, ":upper16:", 9) == 0)
-    inst.reloc.type = BFD_RELOC_ARM_MOVT;
+    inst.relocs[0].type = BFD_RELOC_ARM_MOVT;
  
-  if (inst.reloc.type != BFD_RELOC_UNUSED)
+  if (inst.relocs[0].type != BFD_RELOC_UNUSED)
      {
        p += 9;
        skip_whitespace (p);
      }
  
-  if (my_get_expression (&inst.reloc.exp, &p, GE_NO_PREFIX))
+  if (my_get_expression (&inst.relocs[0].exp, &p, GE_NO_PREFIX))
      return FAIL;
  
-  if (inst.reloc.type == BFD_RELOC_UNUSED)
+  if (inst.relocs[0].type == BFD_RELOC_UNUSED)
      {
-      if (inst.reloc.exp.X_op != O_constant)
+      if (inst.relocs[0].exp.X_op != O_constant)
         {
           inst.error = _("constant expression expected");
           return FAIL;
         }
-      if (inst.reloc.exp.X_add_number < 0
-         || inst.reloc.exp.X_add_number > 0xffff)
+      if (inst.relocs[0].exp.X_add_number < 0
+         || inst.relocs[0].exp.X_add_number > 0xffff)
         {
           inst.error = _("immediate value out of range");
           return FAIL;
@@ -5994,6 +6215,39 @@ check_suffix:
    return FAIL;
  }
  
+static int
+parse_sys_vldr_vstr (char **str)
+{
+  unsigned i;
+  int val = FAIL;
+  struct {
+    const char *name;
+    int regl;
+    int regh;
+  } sysregs[] = {
+    {"FPSCR",          0x1, 0x0},
+    {"FPSCR_nzcvqc",   0x2, 0x0},
+    {"VPR",            0x4, 0x1},
+    {"P0",             0x5, 0x1},
+    {"FPCXTNS",                0x6, 0x1},
+    {"FPCXTS",         0x7, 0x1}
+  };
+  char *op_end = strchr (*str, ',');
+  size_t op_strlen = op_end - *str;
+
+  for (i = 0; i < sizeof (sysregs) / sizeof (sysregs[0]); i++)
+    {
+      if (!strncmp (*str, sysregs[i].name, op_strlen))
+       {
+         val = sysregs[i].regl | (sysregs[i].regh << 3);
+         *str = op_end;
+         break;
+       }
+    }
+
+  return val;
+}
+
  /* Parse the flags argument to CPSI[ED].  Returns FAIL on error, or a
     value suitable for splatting into the AIF field of the instruction. */
  
@@ -6125,32 +6379,6 @@ parse_cond (char **str)
    return c->value;
  }
  
-/* Record a use of the given feature.  */
-static void
-record_feature_use (const arm_feature_set *feature)
-{
-  if (thumb_mode)
-    ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature);
-  else
-    ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature);
-}
-
-/* If the given feature available in the selected CPU, mark it as used.
-   Returns TRUE iff feature is available.  */
-static bfd_boolean
-mark_feature_used (const arm_feature_set *feature)
-{
-  /* Ensure the option is valid on the current architecture.  */
-  if (!ARM_CPU_HAS_FEATURE (cpu_variant, *feature))
-    return FALSE;
-
-  /* Add the appropriate architecture feature for the barrier option used.
-     */
-  record_feature_use (feature);
-
-  return TRUE;
-}
-
  /* Parse an option for a barrier instruction.  Returns the encoding for the
     option, or FAIL.  */
  static int
@@ -6213,7 +6441,7 @@ parse_tb (char **str)
      {
        if (parse_shift (&p, 0, SHIFT_LSL_IMMEDIATE) == FAIL)
         return FAIL;
-      if (inst.reloc.exp.X_add_number != 1)
+      if (inst.relocs[0].exp.X_add_number != 1)
         {
           inst.error = _("invalid shift");
           return FAIL;
@@ -6476,9 +6704,15 @@ enum operand_parse_code
    OP_RVS,      /* VFP single precision register */
    OP_RVD,      /* VFP double precision register (0..15) */
    OP_RND,       /* Neon double precision register (0..31) */
+  OP_RNDMQ,     /* Neon double precision (0..31) or MVE vector register.  */
+  OP_RNDMQR,    /* Neon double precision (0..31), MVE vector or ARM register.
+                */
    OP_RNQ,      /* Neon quad precision register */
+  OP_RNQMQ,    /* Neon quad or MVE vector register.  */
    OP_RVSD,     /* VFP single or double precision register */
+  OP_RNSD,      /* Neon single or double precision register */
    OP_RNDQ,      /* Neon double or quad precision register */
+  OP_RNDQMQ,     /* Neon double, quad or MVE vector register.  */
    OP_RNSDQ,    /* Neon single, double or quad precision register */
    OP_RNSC,      /* Neon scalar D[X] */
    OP_RVC,      /* VFP control register */
@@ -6493,24 +6727,42 @@ enum operand_parse_code
    OP_RIWG,     /* iWMMXt wCG register */
    OP_RXA,      /* XScale accumulator register */
  
+  OP_RNSDQMQ,  /* Neon single, double or quad register or MVE vector register
+                */
+  OP_RNSDQMQR, /* Neon single, double or quad register, MVE vector register or
+                  GPR (no SP/SP)  */
+  OP_RMQ,      /* MVE vector register.  */
+
+  /* New operands for Armv8.1-M Mainline.  */
+  OP_LR,       /* ARM LR register */
+  OP_RRe,      /* ARM register, only even numbered.  */
+  OP_RRo,      /* ARM register, only odd numbered, not r13 or r15.  */
+  OP_RRnpcsp_I32, /* ARM register (no BadReg) or literal 1 .. 32 */
+
    OP_REGLST,   /* ARM register list */
+  OP_CLRMLST,  /* CLRM register list */
    OP_VRSLST,   /* VFP single-precision register list */
    OP_VRDLST,   /* VFP double-precision register list */
    OP_VRSDLST,   /* VFP single or double-precision register list (& quad) */
    OP_NRDLST,    /* Neon double-precision register list (d0-d31, qN aliases) */
    OP_NSTRLST,   /* Neon element/structure list */
+  OP_VRSDVLST,  /* VFP single or double-precision register list and VPR */
  
    OP_RNDQ_I0,   /* Neon D or Q reg, or immediate zero.  */
    OP_RVSD_I0,  /* VFP S or D reg, or immediate zero.  */
    OP_RSVD_FI0, /* VFP S or D reg, or floating point immediate zero.  */
    OP_RR_RNSC,   /* ARM reg or Neon scalar.  */
+  OP_RNSD_RNSC, /* Neon S or D reg, or Neon scalar.  */
    OP_RNSDQ_RNSC, /* Vector S, D or Q reg, or Neon scalar.  */
+  OP_RNSDQ_RNSC_MQ, /* Vector S, D or Q reg, Neon scalar or MVE vector register.
+                    */
    OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar.  */
    OP_RND_RNSC,  /* Neon D reg, or Neon scalar.  */
    OP_VMOV,      /* Neon VMOV operands.  */
    OP_RNDQ_Ibig,        /* Neon D or Q reg, or big immediate for logic and VMVN.  */
    OP_RNDQ_I63b, /* Neon D or Q reg, or immediate for shift.  */
    OP_RIWR_I32z, /* iWMMXt wR register, or immediate 0 .. 32 for iWMMXt2.  */
+  OP_VLDR,     /* VLDR operand.  */
  
    OP_I0,        /* immediate zero */
    OP_I7,       /* immediate value 0 .. 7 */
@@ -6541,6 +6793,7 @@ enum operand_parse_code
    OP_EXP,      /* arbitrary expression */
    OP_EXPi,     /* same, with optional immediate prefix */
    OP_EXPr,     /* same, with optional relocation suffix */
+  OP_EXPs,     /* same, with optional non-first operand relocation suffix */
    OP_HALF,     /* 0 .. 65535 or low/high reloc.  */
    OP_IROT1,    /* VCADD rotate immediate: 90, 270.  */
    OP_IROT2,    /* VCMLA rotate immediate: 0, 90, 180, 270.  */
@@ -6570,13 +6823,17 @@ enum operand_parse_code
    OP_oI255c,    /*       curly-brace enclosed, 0 .. 255 */
  
    OP_oRR,       /* ARM register */
+  OP_oLR,       /* ARM LR register */
    OP_oRRnpc,    /* ARM register, not the PC */
    OP_oRRnpcsp,  /* ARM register, neither the PC nor the SP (a.k.a. BadReg) */
    OP_oRRw,      /* ARM register, not r15, optional trailing ! */
    OP_oRND,       /* Optional Neon double precision register */
    OP_oRNQ,       /* Optional Neon quad precision register */
+  OP_oRNDQMQ,     /* Optional Neon double, quad or MVE vector register.  */
    OP_oRNDQ,      /* Optional Neon double or quad precision register */
    OP_oRNSDQ,    /* Optional single, double or quad precision vector register */
+  OP_oRNSDQMQ,  /* Optional single, double or quad register or MVE vector
+                   register.  */
    OP_oSHll,     /* LSL immediate */
    OP_oSHar,     /* ASR immediate */
    OP_oSHllar,   /* LSL or ASR immediate */
@@ -6605,6 +6862,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
    enum arm_reg_type rtype;
    parse_operand_result result;
    unsigned int op_parse_code;
+  bfd_boolean partial_match;
  
  #define po_char_or_fail(chr)                   \
    do                                           \
@@ -6738,6 +6996,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_RRnpc:
         case OP_RRnpcsp:
         case OP_oRR:
+       case OP_RRe:
+       case OP_RRo:
+       case OP_LR:
+       case OP_oLR:
         case OP_RR:    po_reg_or_fail (REG_TYPE_RN);      break;
         case OP_RCP:   po_reg_or_fail (REG_TYPE_CP);      break;
         case OP_RCN:   po_reg_or_fail (REG_TYPE_CN);      break;
@@ -6745,6 +7007,14 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_RVS:   po_reg_or_fail (REG_TYPE_VFS);     break;
         case OP_RVD:   po_reg_or_fail (REG_TYPE_VFD);     break;
         case OP_oRND:
+       case OP_RNDMQR:
+         po_reg_or_goto (REG_TYPE_RN, try_rndmq);
+         break;
+       try_rndmq:
+       case OP_RNDMQ:
+         po_reg_or_goto (REG_TYPE_MQ, try_rnd);
+         break;
+       try_rnd:
         case OP_RND:   po_reg_or_fail (REG_TYPE_VFD);     break;
         case OP_RVC:
           po_reg_or_goto (REG_TYPE_VFC, coproc_reg);
@@ -6764,13 +7034,37 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_RIWG:  po_reg_or_fail (REG_TYPE_MMXWCG);  break;
         case OP_RXA:   po_reg_or_fail (REG_TYPE_XSCALE);  break;
         case OP_oRNQ:
+       case OP_RNQMQ:
+         po_reg_or_goto (REG_TYPE_MQ, try_nq);
+         break;
+       try_nq:
         case OP_RNQ:   po_reg_or_fail (REG_TYPE_NQ);      break;
+       case OP_RNSD:  po_reg_or_fail (REG_TYPE_NSD);     break;
+       case OP_oRNDQMQ:
+       case OP_RNDQMQ:
+         po_reg_or_goto (REG_TYPE_MQ, try_rndq);
+         break;
+       try_rndq:
         case OP_oRNDQ:
         case OP_RNDQ:  po_reg_or_fail (REG_TYPE_NDQ);     break;
         case OP_RVSD:  po_reg_or_fail (REG_TYPE_VFSD);    break;
         case OP_oRNSDQ:
         case OP_RNSDQ: po_reg_or_fail (REG_TYPE_NSDQ);    break;
-
+       case OP_RNSDQMQR:
+         po_reg_or_goto (REG_TYPE_RN, try_mq);
+         break;
+         try_mq:
+       case OP_oRNSDQMQ:
+       case OP_RNSDQMQ:
+         po_reg_or_goto (REG_TYPE_MQ, try_nsdq2);
+         break;
+         try_nsdq2:
+         po_reg_or_fail (REG_TYPE_NSDQ);
+         inst.error = 0;
+         break;
+       case OP_RMQ:
+         po_reg_or_fail (REG_TYPE_MQ);
+         break;
         /* Neon scalar. Using an element size of 8 means that some invalid
            scalars are accepted here, so deal with those in later code.  */
         case OP_RNSC:  po_scalar_or_goto (8, failure);    break;
@@ -6813,6 +7107,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           }
           break;
  
+       case OP_RNSDQ_RNSC_MQ:
+         po_reg_or_goto (REG_TYPE_MQ, try_rnsdq_rnsc);
+         break;
+       try_rnsdq_rnsc:
         case OP_RNSDQ_RNSC:
           {
             po_scalar_or_goto (8, try_nsdq);
@@ -6822,6 +7120,18 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           }
           break;
  
+       case OP_RNSD_RNSC:
+         {
+           po_scalar_or_goto (8, try_s_scalar);
+           break;
+           try_s_scalar:
+           po_scalar_or_goto (4, try_nsd);
+           break;
+           try_nsd:
+           po_reg_or_fail (REG_TYPE_NSD);
+         }
+         break;
+
         case OP_RNDQ_RNSC:
           {
             po_scalar_or_goto (8, try_ndq);
@@ -6936,19 +7246,19 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  
           /* Expressions */
         case OP_EXPi:   EXPi:
-         po_misc_or_fail (my_get_expression (&inst.reloc.exp, &str,
+         po_misc_or_fail (my_get_expression (&inst.relocs[0].exp, &str,
                                               GE_OPT_PREFIX));
           break;
  
         case OP_EXP:
-         po_misc_or_fail (my_get_expression (&inst.reloc.exp, &str,
+         po_misc_or_fail (my_get_expression (&inst.relocs[0].exp, &str,
                                               GE_NO_PREFIX));
           break;
  
         case OP_EXPr:   EXPr:
-         po_misc_or_fail (my_get_expression (&inst.reloc.exp, &str,
+         po_misc_or_fail (my_get_expression (&inst.relocs[0].exp, &str,
                                               GE_NO_PREFIX));
-         if (inst.reloc.exp.X_op == O_symbol)
+         if (inst.relocs[0].exp.X_op == O_symbol)
             {
               val = parse_reloc (&str);
               if (val == -1)
@@ -6964,6 +7274,20 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
             }
           break;
  
+       case OP_EXPs:
+         po_misc_or_fail (my_get_expression (&inst.relocs[i].exp, &str,
+                                             GE_NO_PREFIX));
+         if (inst.relocs[i].exp.X_op == O_symbol)
+           {
+             inst.operands[i].hasreloc = 1;
+           }
+         else if (inst.relocs[i].exp.X_op == O_constant)
+           {
+             inst.operands[i].imm = inst.relocs[i].exp.X_add_number;
+             inst.operands[i].hasreloc = 0;
+           }
+         break;
+
           /* Operand for MOVW or MOVT.  */
         case OP_HALF:
           po_misc_or_fail (parse_half (&str));
@@ -7052,6 +7376,13 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           val = parse_psr (&str, op_parse_code == OP_wPSR);
           break;
  
+       case OP_VLDR:
+         po_reg_or_goto (REG_TYPE_VFSD, try_sysreg);
+         break;
+       try_sysreg:
+         val = parse_sys_vldr_vstr (&str);
+         break;
+
         case OP_APSR_RR:
           po_reg_or_goto (REG_TYPE_RN, try_apsr);
           break;
@@ -7087,7 +7418,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  
           /* Register lists.  */
         case OP_REGLST:
-         val = parse_reg_list (&str);
+         val = parse_reg_list (&str, REGLIST_RN);
           if (*str == '^')
             {
               inst.operands[i].writeback = 1;
@@ -7095,30 +7426,48 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
             }
           break;
  
+       case OP_CLRMLST:
+         val = parse_reg_list (&str, REGLIST_CLRM);
+         break;
+
         case OP_VRSLST:
-         val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_S);
+         val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_S,
+                                   &partial_match);
           break;
  
         case OP_VRDLST:
-         val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_D);
+         val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_D,
+                                   &partial_match);
           break;
  
         case OP_VRSDLST:
           /* Allow Q registers too.  */
           val = parse_vfp_reg_list (&str, &inst.operands[i].reg,
-                                   REGLIST_NEON_D);
+                                   REGLIST_NEON_D, &partial_match);
           if (val == FAIL)
             {
               inst.error = NULL;
               val = parse_vfp_reg_list (&str, &inst.operands[i].reg,
-                                       REGLIST_VFP_S);
+                                       REGLIST_VFP_S, &partial_match);
+             inst.operands[i].issingle = 1;
+           }
+         break;
+
+       case OP_VRSDVLST:
+         val = parse_vfp_reg_list (&str, &inst.operands[i].reg,
+                                   REGLIST_VFP_D_VPR, &partial_match);
+         if (val == FAIL && !partial_match)
+           {
+             inst.error = NULL;
+             val = parse_vfp_reg_list (&str, &inst.operands[i].reg,
+                                       REGLIST_VFP_S_VPR, &partial_match);
               inst.operands[i].issingle = 1;
             }
           break;
  
         case OP_NRDLST:
           val = parse_vfp_reg_list (&str, &inst.operands[i].reg,
-                                   REGLIST_NEON_D);
+                                   REGLIST_NEON_D, &partial_match);
           break;
  
         case OP_NSTRLST:
@@ -7210,6 +7559,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
             inst.error = BAD_PC;
           break;
  
+       case OP_VLDR:
+         if (inst.operands[i].isreg)
+           break;
+       /* fall through.  */
         case OP_CPSF:
         case OP_ENDI:
         case OP_oROR:
@@ -7218,9 +7571,11 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_COND:
         case OP_oBARRIER_I15:
         case OP_REGLST:
+       case OP_CLRMLST:
         case OP_VRSLST:
         case OP_VRDLST:
         case OP_VRSDLST:
+       case OP_VRSDVLST:
         case OP_NRDLST:
         case OP_NSTRLST:
           if (val == FAIL)
@@ -7228,6 +7583,30 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           inst.operands[i].imm = val;
           break;
  
+       case OP_LR:
+       case OP_oLR:
+         if (inst.operands[i].reg != REG_LR)
+           inst.error = _("operand must be LR register");
+         break;
+
+       case OP_RRe:
+         if (inst.operands[i].isreg
+             && (inst.operands[i].reg & 0x00000001) != 0)
+           inst.error = BAD_ODD;
+         break;
+
+       case OP_RRo:
+         if (inst.operands[i].isreg)
+           {
+             if ((inst.operands[i].reg & 0x00000001) != 1)
+               inst.error = BAD_EVEN;
+             else if (inst.operands[i].reg == REG_SP)
+               as_tsktsk (MVE_BAD_SP);
+             else if (inst.operands[i].reg == REG_PC)
+               inst.error = BAD_PC;
+           }
+         break;
+
         default:
           break;
         }
@@ -7245,7 +7624,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           /* The parse routine should already have set inst.error, but set a
              default here just in case.  */
           if (!inst.error)
-           inst.error = _("syntax error");
+           inst.error = BAD_SYNTAX;
           return FAIL;
         }
  
@@ -7257,7 +7636,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           && upat[i+1] == OP_stop)
         {
           if (!inst.error)
-           inst.error = _("syntax error");
+           inst.error = BAD_SYNTAX;
           return FAIL;
         }
  
@@ -7338,7 +7717,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  static void
  do_scalar_fp16_v82_encode (void)
  {
-  if (inst.cond != COND_ALWAYS)
+  if (inst.cond < COND_ALWAYS)
      as_warn (_("ARMv8.2 scalar fp16 instruction cannot be conditional,"
                " the behaviour is UNPREDICTABLE"));
    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16),
@@ -7484,7 +7863,7 @@ encode_arm_shift (int i)
           inst.instruction |= inst.operands[i].imm << 8;
         }
        else
-       inst.reloc.type = BFD_RELOC_ARM_SHIFT_IMM;
+       inst.relocs[0].type = BFD_RELOC_ARM_SHIFT_IMM;
      }
  }
  
@@ -7499,7 +7878,7 @@ encode_arm_shifter_operand (int i)
    else
      {
        inst.instruction |= INST_IMMEDIATE;
-      if (inst.reloc.type != BFD_RELOC_ARM_IMMEDIATE)
+      if (inst.relocs[0].type != BFD_RELOC_ARM_IMMEDIATE)
         inst.instruction |= inst.operands[i].imm;
      }
  }
@@ -7574,13 +7953,13 @@ encode_arm_addr_mode_2 (int i, bfd_boolean is_t)
           else
             {
               inst.instruction |= inst.operands[i].shift_kind << 5;
-             inst.reloc.type = BFD_RELOC_ARM_SHIFT_IMM;
+             inst.relocs[0].type = BFD_RELOC_ARM_SHIFT_IMM;
             }
         }
      }
-  else /* immediate offset in inst.reloc */
+  else /* immediate offset in inst.relocs[0] */
      {
-      if (is_pc && !inst.reloc.pc_rel)
+      if (is_pc && !inst.relocs[0].pc_rel)
         {
           const bfd_boolean is_load = ((inst.instruction & LOAD_BIT) != 0);
  
@@ -7597,12 +7976,12 @@ encode_arm_addr_mode_2 (int i, bfd_boolean is_t)
             as_tsktsk (_("use of PC in this instruction is deprecated"));
         }
  
-      if (inst.reloc.type == BFD_RELOC_UNUSED)
+      if (inst.relocs[0].type == BFD_RELOC_UNUSED)
         {
           /* Prefer + for zero encoded value.  */
           if (!inst.operands[i].negative)
             inst.instruction |= INDEX_UP;
-         inst.reloc.type = BFD_RELOC_ARM_OFFSET_IMM;
+         inst.relocs[0].type = BFD_RELOC_ARM_OFFSET_IMM;
         }
      }
  }
@@ -7634,19 +8013,19 @@ encode_arm_addr_mode_3 (int i, bfd_boolean is_t)
        if (!inst.operands[i].negative)
         inst.instruction |= INDEX_UP;
      }
-  else /* immediate offset in inst.reloc */
+  else /* immediate offset in inst.relocs[0] */
      {
-      constraint ((inst.operands[i].reg == REG_PC && !inst.reloc.pc_rel
+      constraint ((inst.operands[i].reg == REG_PC && !inst.relocs[0].pc_rel
                    && inst.operands[i].writeback),
                   BAD_PC_WRITEBACK);
        inst.instruction |= HWOFFSET_IMM;
-      if (inst.reloc.type == BFD_RELOC_UNUSED)
+      if (inst.relocs[0].type == BFD_RELOC_UNUSED)
         {
           /* Prefer + for zero encoded value.  */
           if (!inst.operands[i].negative)
             inst.instruction |= INDEX_UP;
  
-         inst.reloc.type = BFD_RELOC_ARM_OFFSET_IMM8;
+         inst.relocs[0].type = BFD_RELOC_ARM_OFFSET_IMM8;
         }
      }
  }
@@ -7899,7 +8278,7 @@ enum lit_type
  
  static void do_vfp_nsyn_opcode (const char *);
  
-/* inst.reloc.exp describes an "=expr" load pseudo-operation.
+/* inst.relocs[0].exp describes an "=expr" load pseudo-operation.
     Determine whether it can be performed with a move instruction; if
     it can, convert inst.instruction to that move instruction and
     return TRUE; if it can't, convert inst.instruction to a literal-pool
@@ -7926,28 +8305,28 @@ move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
        return TRUE;
      }
  
-  if (inst.reloc.exp.X_op != O_constant
-      && inst.reloc.exp.X_op != O_symbol
-      && inst.reloc.exp.X_op != O_big)
+  if (inst.relocs[0].exp.X_op != O_constant
+      && inst.relocs[0].exp.X_op != O_symbol
+      && inst.relocs[0].exp.X_op != O_big)
      {
        inst.error = _("constant expression expected");
        return TRUE;
      }
  
-  if (inst.reloc.exp.X_op == O_constant
-      || inst.reloc.exp.X_op == O_big)
+  if (inst.relocs[0].exp.X_op == O_constant
+      || inst.relocs[0].exp.X_op == O_big)
      {
  #if defined BFD_HOST_64_BIT
        bfd_int64_t v;
  #else
        offsetT v;
  #endif
-      if (inst.reloc.exp.X_op == O_big)
+      if (inst.relocs[0].exp.X_op == O_big)
         {
           LITTLENUM_TYPE w[X_PRECISION];
           LITTLENUM_TYPE * l;
  
-         if (inst.reloc.exp.X_add_number == -1)
+         if (inst.relocs[0].exp.X_add_number == -1)
             {
               gen_to_words (w, X_PRECISION, E_PRECISION);
               l = w;
@@ -7971,7 +8350,7 @@ move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
  #endif
         }
        else
-       v = inst.reloc.exp.X_add_number;
+       v = inst.relocs[0].exp.X_add_number;
  
        if (!inst.operands[i].issingle)
         {
@@ -8060,7 +8439,7 @@ move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
               unsigned immlo = inst.operands[1].imm;
               unsigned immhi = inst.operands[1].regisimm
                 ? inst.operands[1].reg
-               : inst.reloc.exp.X_unsigned
+               : inst.relocs[0].exp.X_unsigned
                 ? 0
                 : ((bfd_int64_t)((int) immlo)) >> 32;
               int cmode = neon_cmode_for_move_imm (immlo, immhi, FALSE, &immbits,
@@ -8135,8 +8514,8 @@ move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
    inst.operands[1].reg = REG_PC;
    inst.operands[1].isreg = 1;
    inst.operands[1].preind = 1;
-  inst.reloc.pc_rel = 1;
-  inst.reloc.type = (thumb_p
+  inst.relocs[0].pc_rel = 1;
+  inst.relocs[0].type = (thumb_p
                      ? BFD_RELOC_ARM_THUMB_OFFSET
                      : (mode_3
                         ? BFD_RELOC_ARM_HWLITERAL
@@ -8203,15 +8582,15 @@ encode_arm_cp_address (int i, int wb_ok, int unind_ok, int reloc_override)
      }
  
    if (reloc_override)
-    inst.reloc.type = (bfd_reloc_code_real_type) reloc_override;
-  else if ((inst.reloc.type < BFD_RELOC_ARM_ALU_PC_G0_NC
-           || inst.reloc.type > BFD_RELOC_ARM_LDC_SB_G2)
-          && inst.reloc.type != BFD_RELOC_ARM_LDR_PC_G0)
+    inst.relocs[0].type = (bfd_reloc_code_real_type) reloc_override;
+  else if ((inst.relocs[0].type < BFD_RELOC_ARM_ALU_PC_G0_NC
+           || inst.relocs[0].type > BFD_RELOC_ARM_LDC_SB_G2)
+          && inst.relocs[0].type != BFD_RELOC_ARM_LDR_PC_G0)
      {
        if (thumb_mode)
-       inst.reloc.type = BFD_RELOC_ARM_T32_CP_OFF_IMM;
+       inst.relocs[0].type = BFD_RELOC_ARM_T32_CP_OFF_IMM;
        else
-       inst.reloc.type = BFD_RELOC_ARM_CP_OFF_IMM;
+       inst.relocs[0].type = BFD_RELOC_ARM_CP_OFF_IMM;
      }
  
    /* Prefer + for zero encoded value.  */
@@ -8330,9 +8709,9 @@ static void
  do_rm_rd_rn (void)
  {
    constraint ((inst.operands[2].reg == REG_PC), BAD_PC);
-  constraint (((inst.reloc.exp.X_op != O_constant
-               && inst.reloc.exp.X_op != O_illegal)
-              || inst.reloc.exp.X_add_number != 0),
+  constraint (((inst.relocs[0].exp.X_op != O_constant
+               && inst.relocs[0].exp.X_op != O_illegal)
+              || inst.relocs[0].exp.X_add_number != 0),
               BAD_ADDR_MODE);
    inst.instruction |= inst.operands[0].reg;
    inst.instruction |= inst.operands[1].reg << 12;
@@ -8366,15 +8745,16 @@ do_adr (void)
  
    /* Frag hacking will turn this into a sub instruction if the offset turns
       out to be negative.  */
-  inst.reloc.type = BFD_RELOC_ARM_IMMEDIATE;
-  inst.reloc.pc_rel = 1;
-  inst.reloc.exp.X_add_number -= 8;
+  inst.relocs[0].type = BFD_RELOC_ARM_IMMEDIATE;
+  inst.relocs[0].pc_rel = 1;
+  inst.relocs[0].exp.X_add_number -= 8;
  
-  if (inst.reloc.exp.X_op == O_symbol
-      && inst.reloc.exp.X_add_symbol != NULL
-      && S_IS_DEFINED (inst.reloc.exp.X_add_symbol)
-      && THUMB_IS_FUNC (inst.reloc.exp.X_add_symbol))
-    inst.reloc.exp.X_add_number += 1;  
+  if (support_interwork
+      && inst.relocs[0].exp.X_op == O_symbol
+      && inst.relocs[0].exp.X_add_symbol != NULL
+      && S_IS_DEFINED (inst.relocs[0].exp.X_add_symbol)
+      && THUMB_IS_FUNC (inst.relocs[0].exp.X_add_symbol))
+    inst.relocs[0].exp.X_add_number |= 1;
  }
  
  /* This is a pseudo-op of the form "adrl rd, label" to be converted
@@ -8389,23 +8769,24 @@ do_adrl (void)
  
    /* Frag hacking will turn this into a sub instruction if the offset turns
       out to be negative.  */
-  inst.reloc.type             = BFD_RELOC_ARM_ADRL_IMMEDIATE;
-  inst.reloc.pc_rel           = 1;
+  inst.relocs[0].type         = BFD_RELOC_ARM_ADRL_IMMEDIATE;
+  inst.relocs[0].pc_rel               = 1;
    inst.size                   = INSN_SIZE * 2;
-  inst.reloc.exp.X_add_number -= 8;
+  inst.relocs[0].exp.X_add_number -= 8;
  
-  if (inst.reloc.exp.X_op == O_symbol
-      && inst.reloc.exp.X_add_symbol != NULL
-      && S_IS_DEFINED (inst.reloc.exp.X_add_symbol)
-      && THUMB_IS_FUNC (inst.reloc.exp.X_add_symbol))
-    inst.reloc.exp.X_add_number += 1;  
+  if (support_interwork
+      && inst.relocs[0].exp.X_op == O_symbol
+      && inst.relocs[0].exp.X_add_symbol != NULL
+      && S_IS_DEFINED (inst.relocs[0].exp.X_add_symbol)
+      && THUMB_IS_FUNC (inst.relocs[0].exp.X_add_symbol))
+    inst.relocs[0].exp.X_add_number |= 1;
  }
  
  static void
  do_arit (void)
  {
-  constraint (inst.reloc.type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
-             && inst.reloc.type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC ,
+  constraint (inst.relocs[0].type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+             && inst.relocs[0].type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC ,
               THUMB1_RELOC_ONLY);
    if (!inst.operands[1].present)
      inst.operands[1].reg = inst.operands[0].reg;
@@ -8490,13 +8871,13 @@ encode_branch (int default_reloc)
        constraint (inst.operands[0].imm != BFD_RELOC_ARM_PLT32
                   && inst.operands[0].imm != BFD_RELOC_ARM_TLS_CALL,
                   _("the only valid suffixes here are '(plt)' and '(tlscall)'"));
-      inst.reloc.type = inst.operands[0].imm == BFD_RELOC_ARM_PLT32
+      inst.relocs[0].type = inst.operands[0].imm == BFD_RELOC_ARM_PLT32
         ? BFD_RELOC_ARM_PLT32
         : thumb_mode ? BFD_RELOC_ARM_THM_TLS_CALL : BFD_RELOC_ARM_TLS_CALL;
      }
    else
-    inst.reloc.type = (bfd_reloc_code_real_type) default_reloc;
-  inst.reloc.pc_rel = 1;
+    inst.relocs[0].type = (bfd_reloc_code_real_type) default_reloc;
+  inst.relocs[0].pc_rel = 1;
  }
  
  static void
@@ -8570,7 +8951,8 @@ do_bx (void)
    /* Output R_ARM_V4BX relocations if is an EABI object that looks like
       it is for ARMv4t or earlier.  */
    want_reloc = !ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v5);
-  if (object_arch && !ARM_CPU_HAS_FEATURE (*object_arch, arm_ext_v5))
+  if (!ARM_FEATURE_ZERO (selected_object_arch)
+      && !ARM_CPU_HAS_FEATURE (selected_object_arch, arm_ext_v5))
        want_reloc = TRUE;
  
  #ifdef OBJ_ELF
@@ -8579,7 +8961,7 @@ do_bx (void)
      want_reloc = FALSE;
  
    if (want_reloc)
-    inst.reloc.type = BFD_RELOC_ARM_V4BX;
+    inst.relocs[0].type = BFD_RELOC_ARM_V4BX;
  }
  
  
@@ -8808,9 +9190,9 @@ do_it (void)
    inst.size = 0;
    if (unified_syntax)
      {
-      set_it_insn_type (IT_INSN);
-      now_it.mask = (inst.instruction & 0xf) | 0x10;
-      now_it.cc = inst.operands[0].imm;
+      set_pred_insn_type (IT_INSN);
+      now_pred.mask = (inst.instruction & 0xf) | 0x10;
+      now_pred.cc = inst.operands[0].imm;
      }
  }
  
@@ -8868,6 +9250,11 @@ encode_ldmstm(int from_push_pop_mnem)
      {
        int is_push = (inst.instruction & A_PUSH_POP_OP_MASK) == A1_OPCODE_PUSH;
  
+      if (is_push && one_reg == 13 /* SP */)
+       /* PR 22483: The A2 encoding cannot be used when
+          pushing the stack pointer as this is UNPREDICTABLE.  */
+       return;
+
        inst.instruction &= A_COND_MASK;
        inst.instruction |= is_push ? A2_OPCODE_PUSH : A2_OPCODE_POP;
        inst.instruction |= one_reg << 12;
@@ -8943,15 +9330,15 @@ do_ldrex (void)
               || (inst.operands[1].reg == REG_PC),
               BAD_ADDR_MODE);
  
-  constraint (inst.reloc.exp.X_op != O_constant
-             || inst.reloc.exp.X_add_number != 0,
+  constraint (inst.relocs[0].exp.X_op != O_constant
+             || inst.relocs[0].exp.X_add_number != 0,
               _("offset must be zero in ARM encoding"));
  
    constraint ((inst.operands[1].reg == REG_PC), BAD_PC);
  
    inst.instruction |= inst.operands[0].reg << 12;
    inst.instruction |= inst.operands[1].reg << 16;
-  inst.reloc.type = BFD_RELOC_UNUSED;
+  inst.relocs[0].type = BFD_RELOC_UNUSED;
  }
  
  static void
@@ -8978,7 +9365,7 @@ check_ldr_r15_aligned (void)
    constraint (!(inst.operands[1].immisreg)
               && (inst.operands[0].reg == REG_PC
               && inst.operands[1].reg == REG_PC
-             && (inst.reloc.exp.X_add_number & 0x3)),
+             && (inst.relocs[0].exp.X_add_number & 0x3)),
               _("ldr to register 15 must be 4-byte aligned"));
  }
  
@@ -9000,8 +9387,8 @@ do_ldstt (void)
       reject [Rn,...].  */
    if (inst.operands[1].preind)
      {
-      constraint (inst.reloc.exp.X_op != O_constant
-                 || inst.reloc.exp.X_add_number != 0,
+      constraint (inst.relocs[0].exp.X_op != O_constant
+                 || inst.relocs[0].exp.X_add_number != 0,
                   _("this instruction requires a post-indexed address"));
  
        inst.operands[1].preind = 0;
@@ -9032,8 +9419,8 @@ do_ldsttv4 (void)
       reject [Rn,...].  */
    if (inst.operands[1].preind)
      {
-      constraint (inst.reloc.exp.X_op != O_constant
-                 || inst.reloc.exp.X_add_number != 0,
+      constraint (inst.relocs[0].exp.X_op != O_constant
+                 || inst.relocs[0].exp.X_add_number != 0,
                   _("this instruction requires a post-indexed address"));
  
        inst.operands[1].preind = 0;
@@ -9072,8 +9459,8 @@ do_mlas (void)
  static void
  do_mov (void)
  {
-  constraint (inst.reloc.type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
-             && inst.reloc.type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC ,
+  constraint (inst.relocs[0].type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+             && inst.relocs[0].type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC ,
               THUMB1_RELOC_ONLY);
    inst.instruction |= inst.operands[0].reg << 12;
    encode_arm_shifter_operand (1);
@@ -9087,14 +9474,14 @@ do_mov16 (void)
    bfd_boolean top;
  
    top = (inst.instruction & 0x00400000) != 0;
-  constraint (top && inst.reloc.type == BFD_RELOC_ARM_MOVW,
+  constraint (top && inst.relocs[0].type == BFD_RELOC_ARM_MOVW,
               _(":lower16: not allowed in this instruction"));
-  constraint (!top && inst.reloc.type == BFD_RELOC_ARM_MOVT,
+  constraint (!top && inst.relocs[0].type == BFD_RELOC_ARM_MOVT,
               _(":upper16: not allowed in this instruction"));
    inst.instruction |= inst.operands[0].reg << 12;
-  if (inst.reloc.type == BFD_RELOC_UNUSED)
+  if (inst.relocs[0].type == BFD_RELOC_UNUSED)
      {
-      imm = inst.reloc.exp.X_add_number;
+      imm = inst.relocs[0].exp.X_add_number;
        /* The value is in two pieces: 0:11, 16:19.  */
        inst.instruction |= (imm & 0x00000fff);
        inst.instruction |= (imm & 0x0000f000) << 4;
@@ -9198,7 +9585,7 @@ do_mrs (void)
    if (inst.operands[1].isreg)
      {
        br = inst.operands[1].reg;
-      if (((br & 0x200) == 0) && ((br & 0xf0000) != 0xf000))
+      if (((br & 0x200) == 0) && ((br & 0xf0000) != 0xf0000))
         as_bad (_("bad register for mrs"));
      }
    else
@@ -9229,8 +9616,8 @@ do_msr (void)
    else
      {
        inst.instruction |= INST_IMMEDIATE;
-      inst.reloc.type = BFD_RELOC_ARM_IMMEDIATE;
-      inst.reloc.pc_rel = 0;
+      inst.relocs[0].type = BFD_RELOC_ARM_IMMEDIATE;
+      inst.relocs[0].pc_rel = 0;
      }
  }
  
@@ -9470,28 +9857,28 @@ do_shift (void)
                   _("extraneous shift as part of operand to shift insn"));
      }
    else
-    inst.reloc.type = BFD_RELOC_ARM_SHIFT_IMM;
+    inst.relocs[0].type = BFD_RELOC_ARM_SHIFT_IMM;
  }
  
  static void
  do_smc (void)
  {
-  inst.reloc.type = BFD_RELOC_ARM_SMC;
-  inst.reloc.pc_rel = 0;
+  inst.relocs[0].type = BFD_RELOC_ARM_SMC;
+  inst.relocs[0].pc_rel = 0;
  }
  
  static void
  do_hvc (void)
  {
-  inst.reloc.type = BFD_RELOC_ARM_HVC;
-  inst.reloc.pc_rel = 0;
+  inst.relocs[0].type = BFD_RELOC_ARM_HVC;
+  inst.relocs[0].pc_rel = 0;
  }
  
  static void
  do_swi (void)
  {
-  inst.reloc.type = BFD_RELOC_ARM_SWI;
-  inst.reloc.pc_rel = 0;
+  inst.relocs[0].type = BFD_RELOC_ARM_SWI;
+  inst.relocs[0].pc_rel = 0;
  }
  
  static void
@@ -9593,14 +9980,14 @@ do_strex (void)
    constraint (inst.operands[0].reg == inst.operands[1].reg
               || inst.operands[0].reg == inst.operands[2].reg, BAD_OVERLAP);
  
-  constraint (inst.reloc.exp.X_op != O_constant
-             || inst.reloc.exp.X_add_number != 0,
+  constraint (inst.relocs[0].exp.X_op != O_constant
+             || inst.relocs[0].exp.X_add_number != 0,
               _("offset must be zero in ARM encoding"));
  
    inst.instruction |= inst.operands[0].reg << 12;
    inst.instruction |= inst.operands[1].reg;
    inst.instruction |= inst.operands[2].reg << 16;
-  inst.reloc.type = BFD_RELOC_UNUSED;
+  inst.relocs[0].type = BFD_RELOC_UNUSED;
  }
  
  static void
@@ -9987,15 +10374,15 @@ do_fpa_ldmstm (void)
          [Rn]{!}.  The instruction does not really support stacking or
          unstacking, so we have to emulate these by setting appropriate
          bits and offsets.  */
-      constraint (inst.reloc.exp.X_op != O_constant
-                 || inst.reloc.exp.X_add_number != 0,
+      constraint (inst.relocs[0].exp.X_op != O_constant
+                 || inst.relocs[0].exp.X_add_number != 0,
                   _("this instruction does not support indexing"));
  
        if ((inst.instruction & PRE_INDEX) || inst.operands[2].writeback)
-       inst.reloc.exp.X_add_number = 12 * inst.operands[1].imm;
+       inst.relocs[0].exp.X_add_number = 12 * inst.operands[1].imm;
  
        if (!(inst.instruction & INDEX_UP))
-       inst.reloc.exp.X_add_number = -inst.reloc.exp.X_add_number;
+       inst.relocs[0].exp.X_add_number = -inst.relocs[0].exp.X_add_number;
  
        if (!(inst.instruction & PRE_INDEX) && inst.operands[2].writeback)
         {
@@ -10115,7 +10502,7 @@ do_iwmmxt_wldstd (void)
        if (inst.operands[1].writeback)
         inst.instruction |= WRITE_BACK;
        inst.instruction |= inst.operands[1].reg << 16;
-      inst.instruction |= inst.reloc.exp.X_add_number << 4;
+      inst.instruction |= inst.relocs[0].exp.X_add_number << 4;
        inst.instruction |= inst.operands[1].imm;
      }
    else
@@ -10287,7 +10674,7 @@ do_xsc_mra (void)
  static void
  encode_thumb32_shifted_operand (int i)
  {
-  unsigned int value = inst.reloc.exp.X_add_number;
+  unsigned int value = inst.relocs[0].exp.X_add_number;
    unsigned int shift = inst.operands[i].shift_kind;
  
    constraint (inst.operands[i].immisreg,
@@ -10297,7 +10684,7 @@ encode_thumb32_shifted_operand (int i)
      inst.instruction |= SHIFT_ROR << 4;
    else
      {
-      constraint (inst.reloc.exp.X_op != O_constant,
+      constraint (inst.relocs[0].exp.X_op != O_constant,
                   _("expression too complex"));
  
        constraint (value > 32
@@ -10349,14 +10736,14 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
        inst.instruction |= inst.operands[i].imm;
        if (inst.operands[i].shifted)
         {
-         constraint (inst.reloc.exp.X_op != O_constant,
+         constraint (inst.relocs[0].exp.X_op != O_constant,
                       _("expression too complex"));
-         constraint (inst.reloc.exp.X_add_number < 0
-                     || inst.reloc.exp.X_add_number > 3,
+         constraint (inst.relocs[0].exp.X_add_number < 0
+                     || inst.relocs[0].exp.X_add_number > 3,
                       _("shift out of range"));
-         inst.instruction |= inst.reloc.exp.X_add_number << 4;
+         inst.instruction |= inst.relocs[0].exp.X_add_number << 4;
         }
-      inst.reloc.type = BFD_RELOC_UNUSED;
+      inst.relocs[0].type = BFD_RELOC_UNUSED;
      }
    else if (inst.operands[i].preind)
      {
@@ -10378,7 +10765,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
           if (inst.operands[i].writeback)
             inst.instruction |= 0x00000100;
         }
-      inst.reloc.type = BFD_RELOC_ARM_T32_OFFSET_IMM;
+      inst.relocs[0].type = BFD_RELOC_ARM_T32_OFFSET_IMM;
      }
    else if (inst.operands[i].postind)
      {
@@ -10390,7 +10777,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
         inst.instruction |= 0x00200000;
        else
         inst.instruction |= 0x00000900;
-      inst.reloc.type = BFD_RELOC_ARM_T32_OFFSET_IMM;
+      inst.relocs[0].type = BFD_RELOC_ARM_T32_OFFSET_IMM;
      }
    else /* unindexed - only for coprocessor */
      inst.error = _("instruction does not accept unindexed addressing");
@@ -10418,6 +10805,11 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
    X(_asrs,  1000, fa50f000),                   \
    X(_b,     e000, f000b000),                   \
    X(_bcond, d000, f0008000),                   \
+  X(_bf,    0000, f040e001),                   \
+  X(_bfcsel,0000, f000e001),                   \
+  X(_bfx,   0000, f060e001),                   \
+  X(_bfl,   0000, f000c001),                   \
+  X(_bflx,  0000, f070e001),                   \
    X(_bic,   4380, ea200000),                   \
    X(_bics,  4380, ea300000),                   \
    X(_cmn,   42c0, eb100f00),                   \
@@ -10426,6 +10818,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
    X(_cpsid, b670, f3af8600),                   \
    X(_cpy,   4600, ea4f0000),                   \
    X(_dec_sp,80dd, f1ad0d00),                   \
+  X(_dls,   0000, f040e001),                   \
    X(_eor,   4040, ea800000),                   \
    X(_eors,  4040, ea900000),                   \
    X(_inc_sp,00dd, f10d0d00),                   \
@@ -10438,6 +10831,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
    X(_ldr_pc,4800, f85f0000),                   \
    X(_ldr_pc2,4800, f85f0000),                  \
    X(_ldr_sp,9800, f85d0000),                   \
+  X(_le,    0000, f00fc001),                   \
    X(_lsl,   0000, fa00f000),                   \
    X(_lsls,  0000, fa10f000),                   \
    X(_lsr,   0800, fa20f000),                   \
@@ -10479,6 +10873,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
    X(_yield, bf10, f3af8001),                   \
    X(_wfe,   bf20, f3af8002),                   \
    X(_wfi,   bf30, f3af8003),                   \
+  X(_wls,   0000, f040c001),                   \
    X(_sev,   bf40, f3af8004),                    \
    X(_sevl,  bf50, f3af8005),                   \
    X(_udf,   de00, f7f0a000)
@@ -10522,7 +10917,7 @@ do_t_add_sub_w (void)
      reject_bad_reg (Rd);
  
    inst.instruction |= (Rn << 16) | (Rd << 8);
-  inst.reloc.type = BFD_RELOC_ARM_T32_IMM12;
+  inst.relocs[0].type = BFD_RELOC_ARM_T32_IMM12;
  }
  
  /* Parse an add or subtract instruction.  We get here with inst.instruction
@@ -10539,7 +10934,7 @@ do_t_add_sub (void)
         : inst.operands[0].reg);  /* Rd, foo -> Rd, Rd, foo */
  
    if (Rd == REG_PC)
-    set_it_insn_type_last ();
+    set_pred_insn_type_last ();
  
    if (unified_syntax)
      {
@@ -10550,9 +10945,9 @@ do_t_add_sub (void)
        flags = (inst.instruction == T_MNEM_adds
                || inst.instruction == T_MNEM_subs);
        if (flags)
-       narrow = !in_it_block ();
+       narrow = !in_pred_block ();
        else
-       narrow = in_it_block ();
+       narrow = in_pred_block ();
        if (!inst.operands[2].isreg)
         {
           int add;
@@ -10584,11 +10979,12 @@ do_t_add_sub (void)
                 {
                   inst.instruction = THUMB_OP16(opcode);
                   inst.instruction |= (Rd << 4) | Rs;
-                 if (inst.reloc.type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
-                     || inst.reloc.type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC)
+                 if (inst.relocs[0].type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+                     || (inst.relocs[0].type
+                         > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC))
                   {
                     if (inst.size_req == 2)
-                     inst.reloc.type = BFD_RELOC_ARM_THUMB_ADD;
+                     inst.relocs[0].type = BFD_RELOC_ARM_THUMB_ADD;
                     else
                       inst.relax = opcode;
                   }
@@ -10599,29 +10995,31 @@ do_t_add_sub (void)
           if (inst.size_req == 4
               || (inst.size_req != 2 && !opcode))
             {
-             constraint (inst.reloc.type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
-                         && inst.reloc.type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC ,
+             constraint ((inst.relocs[0].type
+                          >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC)
+                         && (inst.relocs[0].type
+                             <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC) ,
                           THUMB1_RELOC_ONLY);
               if (Rd == REG_PC)
                 {
                   constraint (add, BAD_PC);
                   constraint (Rs != REG_LR || inst.instruction != T_MNEM_subs,
                              _("only SUBS PC, LR, #const allowed"));
-                 constraint (inst.reloc.exp.X_op != O_constant,
+                 constraint (inst.relocs[0].exp.X_op != O_constant,
                               _("expression too complex"));
-                 constraint (inst.reloc.exp.X_add_number < 0
-                             || inst.reloc.exp.X_add_number > 0xff,
+                 constraint (inst.relocs[0].exp.X_add_number < 0
+                             || inst.relocs[0].exp.X_add_number > 0xff,
                              _("immediate value out of range"));
                   inst.instruction = T2_SUBS_PC_LR
-                                    | inst.reloc.exp.X_add_number;
-                 inst.reloc.type = BFD_RELOC_UNUSED;
+                                    | inst.relocs[0].exp.X_add_number;
+                 inst.relocs[0].type = BFD_RELOC_UNUSED;
                   return;
                 }
               else if (Rs == REG_PC)
                 {
                   /* Always use addw/subw.  */
                   inst.instruction = add ? 0xf20f0000 : 0xf2af0000;
-                 inst.reloc.type = BFD_RELOC_ARM_T32_IMM12;
+                 inst.relocs[0].type = BFD_RELOC_ARM_T32_IMM12;
                 }
               else
                 {
@@ -10629,9 +11027,9 @@ do_t_add_sub (void)
                   inst.instruction = (inst.instruction & 0xe1ffffff)
                                      | 0x10000000;
                   if (flags)
-                   inst.reloc.type = BFD_RELOC_ARM_T32_IMMEDIATE;
+                   inst.relocs[0].type = BFD_RELOC_ARM_T32_IMMEDIATE;
                   else
-                   inst.reloc.type = BFD_RELOC_ARM_T32_ADD_IMM;
+                   inst.relocs[0].type = BFD_RELOC_ARM_T32_ADD_IMM;
                 }
               inst.instruction |= Rd << 8;
               inst.instruction |= Rs << 16;
@@ -10639,7 +11037,7 @@ do_t_add_sub (void)
         }
        else
         {
-         unsigned int value = inst.reloc.exp.X_add_number;
+         unsigned int value = inst.relocs[0].exp.X_add_number;
           unsigned int shift = inst.operands[2].shift_kind;
  
           Rn = inst.operands[2].reg;
@@ -10715,7 +11113,7 @@ do_t_add_sub (void)
           inst.instruction = (inst.instruction == T_MNEM_add
                               ? 0x0000 : 0x8000);
           inst.instruction |= (Rd << 4) | Rs;
-         inst.reloc.type = BFD_RELOC_ARM_THUMB_ADD;
+         inst.relocs[0].type = BFD_RELOC_ARM_THUMB_ADD;
           return;
         }
  
@@ -10766,24 +11164,24 @@ do_t_adr (void)
        /* Generate a 32-bit opcode.  */
        inst.instruction = THUMB_OP32 (inst.instruction);
        inst.instruction |= Rd << 8;
-      inst.reloc.type = BFD_RELOC_ARM_T32_ADD_PC12;
-      inst.reloc.pc_rel = 1;
+      inst.relocs[0].type = BFD_RELOC_ARM_T32_ADD_PC12;
+      inst.relocs[0].pc_rel = 1;
      }
    else
      {
        /* Generate a 16-bit opcode.  */
        inst.instruction = THUMB_OP16 (inst.instruction);
-      inst.reloc.type = BFD_RELOC_ARM_THUMB_ADD;
-      inst.reloc.exp.X_add_number -= 4; /* PC relative adjust.  */
-      inst.reloc.pc_rel = 1;
+      inst.relocs[0].type = BFD_RELOC_ARM_THUMB_ADD;
+      inst.relocs[0].exp.X_add_number -= 4; /* PC relative adjust.  */
+      inst.relocs[0].pc_rel = 1;
        inst.instruction |= Rd << 4;
      }
  
-  if (inst.reloc.exp.X_op == O_symbol
-      && inst.reloc.exp.X_add_symbol != NULL
-      && S_IS_DEFINED (inst.reloc.exp.X_add_symbol)
-      && THUMB_IS_FUNC (inst.reloc.exp.X_add_symbol))
-    inst.reloc.exp.X_add_number += 1;
+  if (inst.relocs[0].exp.X_op == O_symbol
+      && inst.relocs[0].exp.X_add_symbol != NULL
+      && S_IS_DEFINED (inst.relocs[0].exp.X_add_symbol)
+      && THUMB_IS_FUNC (inst.relocs[0].exp.X_add_symbol))
+    inst.relocs[0].exp.X_add_number += 1;
  }
  
  /* Arithmetic instructions for which there is just one 16-bit
@@ -10818,7 +11216,7 @@ do_t_arit3 (void)
           inst.instruction = (inst.instruction & 0xe1ffffff) | 0x10000000;
           inst.instruction |= Rd << 8;
           inst.instruction |= Rs << 16;
-         inst.reloc.type = BFD_RELOC_ARM_T32_IMMEDIATE;
+         inst.relocs[0].type = BFD_RELOC_ARM_T32_IMMEDIATE;
         }
        else
         {
@@ -10826,9 +11224,9 @@ do_t_arit3 (void)
  
           /* See if we can do this with a 16-bit instruction.  */
           if (THUMB_SETS_FLAGS (inst.instruction))
-           narrow = !in_it_block ();
+           narrow = !in_pred_block ();
           else
-           narrow = in_it_block ();
+           narrow = in_pred_block ();
  
           if (Rd > 7 || Rn > 7 || Rs > 7)
             narrow = FALSE;
@@ -10906,7 +11304,7 @@ do_t_arit3c (void)
           inst.instruction = (inst.instruction & 0xe1ffffff) | 0x10000000;
           inst.instruction |= Rd << 8;
           inst.instruction |= Rs << 16;
-         inst.reloc.type = BFD_RELOC_ARM_T32_IMMEDIATE;
+         inst.relocs[0].type = BFD_RELOC_ARM_T32_IMMEDIATE;
         }
        else
         {
@@ -10914,9 +11312,9 @@ do_t_arit3c (void)
  
           /* See if we can do this with a 16-bit instruction.  */
           if (THUMB_SETS_FLAGS (inst.instruction))
-           narrow = !in_it_block ();
+           narrow = !in_pred_block ();
           else
-           narrow = in_it_block ();
+           narrow = in_pred_block ();
  
           if (Rd > 7 || Rn > 7 || Rs > 7)
             narrow = FALSE;
@@ -11055,7 +11453,7 @@ do_t_bfx (void)
  static void
  do_t_blx (void)
  {
-  set_it_insn_type_last ();
+  set_pred_insn_type_last ();
  
    if (inst.operands[0].isreg)
      {
@@ -11079,9 +11477,9 @@ do_t_branch (void)
    bfd_reloc_code_real_type reloc;
  
    cond = inst.cond;
-  set_it_insn_type (IF_INSIDE_IT_LAST_INSN);
+  set_pred_insn_type (IF_INSIDE_IT_LAST_INSN);
  
-  if (in_it_block ())
+  if (in_pred_block ())
      {
        /* Conditional branches inside IT blocks are encoded as unconditional
          branches.  */
@@ -11099,7 +11497,7 @@ do_t_branch (void)
        && (inst.size_req == 4
           || (inst.size_req != 2
               && (inst.operands[0].hasreloc
-                 || inst.reloc.exp.X_op == O_constant))))
+                 || inst.relocs[0].exp.X_op == O_constant))))
      {
        inst.instruction = THUMB_OP32(opcode);
        if (cond == COND_ALWAYS)
@@ -11129,8 +11527,8 @@ do_t_branch (void)
        if (unified_syntax && inst.size_req != 2)
         inst.relax = opcode;
      }
-  inst.reloc.type = reloc;
-  inst.reloc.pc_rel = 1;
+  inst.relocs[0].type = reloc;
+  inst.relocs[0].pc_rel = 1;
  }
  
  /* Actually do the work for Thumb state bkpt and hlt.  The only difference
@@ -11148,7 +11546,7 @@ do_t_bkpt_hlt1 (int range)
        inst.instruction |= inst.operands[0].imm;
      }
  
-  set_it_insn_type (NEUTRAL_IT_INSN);
+  set_pred_insn_type (NEUTRAL_IT_INSN);
  }
  
  static void
@@ -11166,7 +11564,7 @@ do_t_bkpt (void)
  static void
  do_t_branch23 (void)
  {
-  set_it_insn_type_last ();
+  set_pred_insn_type_last ();
    encode_branch (BFD_RELOC_THUMB_PCREL_BRANCH23);
  
    /* md_apply_fix blows up with 'bl foo(PLT)' where foo is defined in
@@ -11174,27 +11572,27 @@ do_t_branch23 (void)
       the branch encoding is now needed to deal with TLSCALL relocs.
       So if we see a PLT reloc now, put it back to how it used to be to
       keep the preexisting behaviour.  */
-  if (inst.reloc.type == BFD_RELOC_ARM_PLT32)
-    inst.reloc.type = BFD_RELOC_THUMB_PCREL_BRANCH23;
+  if (inst.relocs[0].type == BFD_RELOC_ARM_PLT32)
+    inst.relocs[0].type = BFD_RELOC_THUMB_PCREL_BRANCH23;
  
  #if defined(OBJ_COFF)
    /* If the destination of the branch is a defined symbol which does not have
       the THUMB_FUNC attribute, then we must be calling a function which has
       the (interfacearm) attribute.  We look for the Thumb entry point to that
       function and change the branch to refer to that function instead. */
-  if (  inst.reloc.exp.X_op == O_symbol
-      && inst.reloc.exp.X_add_symbol != NULL
-      && S_IS_DEFINED (inst.reloc.exp.X_add_symbol)
-      && ! THUMB_IS_FUNC (inst.reloc.exp.X_add_symbol))
-    inst.reloc.exp.X_add_symbol =
-      find_real_start (inst.reloc.exp.X_add_symbol);
+  if (  inst.relocs[0].exp.X_op == O_symbol
+      && inst.relocs[0].exp.X_add_symbol != NULL
+      && S_IS_DEFINED (inst.relocs[0].exp.X_add_symbol)
+      && ! THUMB_IS_FUNC (inst.relocs[0].exp.X_add_symbol))
+    inst.relocs[0].exp.X_add_symbol
+      = find_real_start (inst.relocs[0].exp.X_add_symbol);
  #endif
  }
  
  static void
  do_t_bx (void)
  {
-  set_it_insn_type_last ();
+  set_pred_insn_type_last ();
    inst.instruction |= inst.operands[0].reg << 3;
    /* ??? FIXME: Should add a hacky reloc here if reg is REG_PC.         The reloc
       should cause the alignment to be checked once it is known.         This is
@@ -11206,7 +11604,7 @@ do_t_bxj (void)
  {
    int Rm;
  
-  set_it_insn_type_last ();
+  set_pred_insn_type_last ();
    Rm = inst.operands[0].reg;
    reject_bad_reg (Rm);
    inst.instruction |= Rm << 16;
@@ -11229,17 +11627,23 @@ do_t_clz (void)
    inst.instruction |= Rm;
  }
  
+static void
+do_t_csdb (void)
+{
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+}
+
  static void
  do_t_cps (void)
  {
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
    inst.instruction |= inst.operands[0].imm;
  }
  
  static void
  do_t_cpsi (void)
  {
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
    if (unified_syntax
        && (inst.operands[1].present || inst.size_req == 4)
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6_notm))
@@ -11286,11 +11690,11 @@ do_t_cpy (void)
  static void
  do_t_cbz (void)
  {
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
    constraint (inst.operands[0].reg > 7, BAD_HIREG);
    inst.instruction |= inst.operands[0].reg;
-  inst.reloc.pc_rel = 1;
-  inst.reloc.type = BFD_RELOC_THUMB_PCREL_BRANCH7;
+  inst.relocs[0].pc_rel = 1;
+  inst.relocs[0].type = BFD_RELOC_THUMB_PCREL_BRANCH7;
  }
  
  static void
@@ -11332,10 +11736,11 @@ do_t_it (void)
  {
    unsigned int cond = inst.operands[0].imm;
  
-  set_it_insn_type (IT_INSN);
-  now_it.mask = (inst.instruction & 0xf) | 0x10;
-  now_it.cc = cond;
-  now_it.warn_deprecated = FALSE;
+  set_pred_insn_type (IT_INSN);
+  now_pred.mask = (inst.instruction & 0xf) | 0x10;
+  now_pred.cc = cond;
+  now_pred.warn_deprecated = FALSE;
+  now_pred.type = SCALAR_PRED;
  
    /* If the condition is a negative condition, invert the mask.  */
    if ((cond & 0x1) == 0x0)
@@ -11345,22 +11750,22 @@ do_t_it (void)
        if ((mask & 0x7) == 0)
         {
           /* No conversion needed.  */
-         now_it.block_length = 1;
+         now_pred.block_length = 1;
         }
        else if ((mask & 0x3) == 0)
         {
           mask ^= 0x8;
-         now_it.block_length = 2;
+         now_pred.block_length = 2;
         }
        else if ((mask & 0x1) == 0)
         {
           mask ^= 0xC;
-         now_it.block_length = 3;
+         now_pred.block_length = 3;
         }
        else
         {
           mask ^= 0xE;
-         now_it.block_length = 4;
+         now_pred.block_length = 4;
         }
  
        inst.instruction &= 0xfff0;
@@ -11370,18 +11775,33 @@ do_t_it (void)
    inst.instruction |= cond << 4;
  }
  
+static void
+do_mve_vpt (void)
+{
+  /* We are dealing with a vector predicated block.  */
+  set_pred_insn_type (VPT_INSN);
+  now_pred.cc = 0;
+  now_pred.mask = ((inst.instruction & 0x00400000) >> 19)
+                 | ((inst.instruction & 0xe000) >> 13);
+  now_pred.warn_deprecated = FALSE;
+  now_pred.type = VECTOR_PRED;
+}
+
  /* Helper function used for both push/pop and ldm/stm.  */
  static void
-encode_thumb2_ldmstm (int base, unsigned mask, bfd_boolean writeback)
+encode_thumb2_multi (bfd_boolean do_io, int base, unsigned mask,
+                    bfd_boolean writeback)
  {
-  bfd_boolean load;
+  bfd_boolean load, store;
  
-  load = (inst.instruction & (1 << 20)) != 0;
+  gas_assert (base != -1 || !do_io);
+  load = do_io && ((inst.instruction & (1 << 20)) != 0);
+  store = do_io && !load;
  
    if (mask & (1 << 13))
      inst.error =  _("SP not allowed in register list");
  
-  if ((mask & (1 << base)) != 0
+  if (do_io && (mask & (1 << base)) != 0
        && writeback)
      inst.error = _("having the base register in the register list when "
                    "using write back is UNPREDICTABLE");
@@ -11393,16 +11813,16 @@ encode_thumb2_ldmstm (int base, unsigned mask, bfd_boolean writeback)
           if (mask & (1 << 14))
             inst.error = _("LR and PC should not both be in register list");
           else
-           set_it_insn_type_last ();
+           set_pred_insn_type_last ();
         }
      }
-  else
+  else if (store)
      {
        if (mask & (1 << 15))
         inst.error = _("PC not allowed in register list");
      }
  
-  if ((mask & (mask - 1)) == 0)
+  if (do_io && ((mask & (mask - 1)) == 0))
      {
        /* Single register transfers implemented as str/ldr.  */
        if (writeback)
@@ -11431,14 +11851,15 @@ encode_thumb2_ldmstm (int base, unsigned mask, bfd_boolean writeback)
      inst.instruction |= WRITE_BACK;
  
    inst.instruction |= mask;
-  inst.instruction |= base << 16;
+  if (do_io)
+    inst.instruction |= base << 16;
  }
  
  static void
  do_t_ldmstm (void)
  {
    /* This really doesn't seem worth it.  */
-  constraint (inst.reloc.type != BFD_RELOC_UNUSED,
+  constraint (inst.relocs[0].type != BFD_RELOC_UNUSED,
               _("expression too complex"));
    constraint (inst.operands[1].writeback,
               _("Thumb load/store multiple does not support {reglist}^"));
@@ -11526,8 +11947,9 @@ do_t_ldmstm (void)
           if (inst.instruction < 0xffff)
             inst.instruction = THUMB_OP32 (inst.instruction);
  
-         encode_thumb2_ldmstm (inst.operands[0].reg, inst.operands[1].imm,
-                               inst.operands[0].writeback);
+         encode_thumb2_multi (TRUE /* do_io */, inst.operands[0].reg,
+                              inst.operands[1].imm,
+                              inst.operands[0].writeback);
         }
      }
    else
@@ -11575,7 +11997,7 @@ do_t_ldrex (void)
  
    inst.instruction |= inst.operands[0].reg << 12;
    inst.instruction |= inst.operands[1].reg << 16;
-  inst.reloc.type = BFD_RELOC_ARM_T32_OFFSET_U8;
+  inst.relocs[0].type = BFD_RELOC_ARM_T32_OFFSET_U8;
  }
  
  static void
@@ -11605,7 +12027,7 @@ do_t_ldst (void)
    if (inst.operands[0].isreg
        && !inst.operands[0].preind
        && inst.operands[0].reg == REG_PC)
-    set_it_insn_type_last ();
+    set_pred_insn_type_last ();
  
    opcode = inst.instruction;
    if (unified_syntax)
@@ -11645,7 +12067,7 @@ do_t_ldst (void)
                 {
                   if (Rn == REG_PC)
                     {
-                     if (inst.reloc.pc_rel)
+                     if (inst.relocs[0].pc_rel)
                         opcode = T_MNEM_ldr_pc2;
                       else
                         opcode = T_MNEM_ldr_pc;
@@ -11666,7 +12088,7 @@ do_t_ldst (void)
                 }
               inst.instruction |= THUMB_OP16 (opcode);
               if (inst.size_req == 2)
-               inst.reloc.type = BFD_RELOC_ARM_THUMB_OFFSET;
+               inst.relocs[0].type = BFD_RELOC_ARM_THUMB_OFFSET;
               else
                 inst.relax = opcode;
               return;
@@ -11745,7 +12167,7 @@ do_t_ldst (void)
         inst.instruction = T_OPCODE_STR_SP;
  
        inst.instruction |= inst.operands[0].reg << 8;
-      inst.reloc.type = BFD_RELOC_ARM_THUMB_OFFSET;
+      inst.relocs[0].type = BFD_RELOC_ARM_THUMB_OFFSET;
        return;
      }
  
@@ -11755,7 +12177,7 @@ do_t_ldst (void)
        /* Immediate offset.  */
        inst.instruction |= inst.operands[0].reg;
        inst.instruction |= inst.operands[1].reg << 3;
-      inst.reloc.type = BFD_RELOC_ARM_THUMB_OFFSET;
+      inst.relocs[0].type = BFD_RELOC_ARM_THUMB_OFFSET;
        return;
      }
  
@@ -11864,7 +12286,7 @@ do_t_mov_cmp (void)
    Rm = inst.operands[1].reg;
  
    if (Rn == REG_PC)
-    set_it_insn_type_last ();
+    set_pred_insn_type_last ();
  
    if (unified_syntax)
      {
@@ -11876,7 +12298,7 @@ do_t_mov_cmp (void)
  
        low_regs = (Rn <= 7 && Rm <= 7);
        opcode = inst.instruction;
-      if (in_it_block ())
+      if (in_pred_block ())
         narrow = opcode != T_MNEM_movs;
        else
         narrow = opcode != T_MNEM_movs || low_regs;
@@ -11947,31 +12369,33 @@ do_t_mov_cmp (void)
        if (!inst.operands[1].isreg)
         {
           /* Immediate operand.  */
-         if (!in_it_block () && opcode == T_MNEM_mov)
+         if (!in_pred_block () && opcode == T_MNEM_mov)
             narrow = 0;
           if (low_regs && narrow)
             {
               inst.instruction = THUMB_OP16 (opcode);
               inst.instruction |= Rn << 8;
-             if (inst.reloc.type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
-                 || inst.reloc.type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC)
+             if (inst.relocs[0].type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+                 || inst.relocs[0].type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC)
                 {
                   if (inst.size_req == 2)
-                   inst.reloc.type = BFD_RELOC_ARM_THUMB_IMM;
+                   inst.relocs[0].type = BFD_RELOC_ARM_THUMB_IMM;
                   else
                     inst.relax = opcode;
                 }
             }
           else
             {
-             constraint (inst.reloc.type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
-                         && inst.reloc.type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC ,
+             constraint ((inst.relocs[0].type
+                          >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC)
+                         && (inst.relocs[0].type
+                             <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC) ,
                           THUMB1_RELOC_ONLY);
  
               inst.instruction = THUMB_OP32 (inst.instruction);
               inst.instruction = (inst.instruction & 0xe1ffffff) | 0x10000000;
               inst.instruction |= Rn << r0off;
-             inst.reloc.type = BFD_RELOC_ARM_T32_IMMEDIATE;
+             inst.relocs[0].type = BFD_RELOC_ARM_T32_IMMEDIATE;
             }
         }
        else if (inst.operands[1].shifted && inst.operands[1].immisreg
@@ -11981,7 +12405,7 @@ do_t_mov_cmp (void)
           /* Register shifts are encoded as separate shift instructions.  */
           bfd_boolean flags = (inst.instruction == T_MNEM_movs);
  
-         if (in_it_block ())
+         if (in_pred_block ())
             narrow = !flags;
           else
             narrow = flags;
@@ -12037,7 +12461,7 @@ do_t_mov_cmp (void)
               && (inst.instruction == T_MNEM_mov
                   || inst.instruction == T_MNEM_movs))
             {
-             if (in_it_block ())
+             if (in_pred_block ())
                 narrow = (inst.instruction == T_MNEM_mov);
               else
                 narrow = (inst.instruction == T_MNEM_movs);
@@ -12058,7 +12482,7 @@ do_t_mov_cmp (void)
             {
               inst.instruction |= Rn;
               inst.instruction |= Rm << 3;
-             inst.reloc.type = BFD_RELOC_ARM_THUMB_SHIFT;
+             inst.relocs[0].type = BFD_RELOC_ARM_THUMB_SHIFT;
             }
           else
             {
@@ -12149,7 +12573,7 @@ do_t_mov_cmp (void)
        constraint (Rn > 7,
                   _("only lo regs allowed with immediate"));
        inst.instruction |= Rn << 8;
-      inst.reloc.type = BFD_RELOC_ARM_THUMB_IMM;
+      inst.relocs[0].type = BFD_RELOC_ARM_THUMB_IMM;
      }
  }
  
@@ -12161,24 +12585,24 @@ do_t_mov16 (void)
    bfd_boolean top;
  
    top = (inst.instruction & 0x00800000) != 0;
-  if (inst.reloc.type == BFD_RELOC_ARM_MOVW)
+  if (inst.relocs[0].type == BFD_RELOC_ARM_MOVW)
      {
        constraint (top, _(":lower16: not allowed in this instruction"));
-      inst.reloc.type = BFD_RELOC_ARM_THUMB_MOVW;
+      inst.relocs[0].type = BFD_RELOC_ARM_THUMB_MOVW;
      }
-  else if (inst.reloc.type == BFD_RELOC_ARM_MOVT)
+  else if (inst.relocs[0].type == BFD_RELOC_ARM_MOVT)
      {
        constraint (!top, _(":upper16: not allowed in this instruction"));
-      inst.reloc.type = BFD_RELOC_ARM_THUMB_MOVT;
+      inst.relocs[0].type = BFD_RELOC_ARM_THUMB_MOVT;
      }
  
    Rd = inst.operands[0].reg;
    reject_bad_reg (Rd);
  
    inst.instruction |= Rd << 8;
-  if (inst.reloc.type == BFD_RELOC_UNUSED)
+  if (inst.relocs[0].type == BFD_RELOC_UNUSED)
      {
-      imm = inst.reloc.exp.X_add_number;
+      imm = inst.relocs[0].exp.X_add_number;
        inst.instruction |= (imm & 0xf000) << 4;
        inst.instruction |= (imm & 0x0800) << 15;
        inst.instruction |= (imm & 0x0700) << 4;
@@ -12216,9 +12640,9 @@ do_t_mvn_tst (void)
                || inst.instruction == T_MNEM_tst)
         narrow = TRUE;
        else if (THUMB_SETS_FLAGS (inst.instruction))
-       narrow = !in_it_block ();
+       narrow = !in_pred_block ();
        else
-       narrow = in_it_block ();
+       narrow = in_pred_block ();
  
        if (!inst.operands[1].isreg)
         {
@@ -12228,7 +12652,7 @@ do_t_mvn_tst (void)
             inst.instruction = THUMB_OP32 (inst.instruction);
           inst.instruction = (inst.instruction & 0xe1ffffff) | 0x10000000;
           inst.instruction |= Rn << r0off;
-         inst.reloc.type = BFD_RELOC_ARM_T32_IMMEDIATE;
+         inst.relocs[0].type = BFD_RELOC_ARM_T32_IMMEDIATE;
         }
        else
         {
@@ -12383,9 +12807,9 @@ do_t_mul (void)
           || Rm > 7)
         narrow = FALSE;
        else if (inst.instruction == T_MNEM_muls)
-       narrow = !in_it_block ();
+       narrow = !in_pred_block ();
        else
-       narrow = in_it_block ();
+       narrow = in_pred_block ();
      }
    else
      {
@@ -12451,7 +12875,7 @@ do_t_mull (void)
  static void
  do_t_nop (void)
  {
-  set_it_insn_type (NEUTRAL_IT_INSN);
+  set_pred_insn_type (NEUTRAL_IT_INSN);
  
    if (unified_syntax)
      {
@@ -12489,9 +12913,9 @@ do_t_neg (void)
        bfd_boolean narrow;
  
        if (THUMB_SETS_FLAGS (inst.instruction))
-       narrow = !in_it_block ();
+       narrow = !in_pred_block ();
        else
-       narrow = in_it_block ();
+       narrow = in_pred_block ();
        if (inst.operands[0].reg > 7 || inst.operands[1].reg > 7)
         narrow = FALSE;
        if (inst.size_req == 4)
@@ -12540,7 +12964,7 @@ do_t_orn (void)
    if (!inst.operands[2].isreg)
      {
        inst.instruction = (inst.instruction & 0xe1ffffff) | 0x10000000;
-      inst.reloc.type = BFD_RELOC_ARM_T32_IMMEDIATE;
+      inst.relocs[0].type = BFD_RELOC_ARM_T32_IMMEDIATE;
      }
    else
      {
@@ -12574,8 +12998,8 @@ do_t_pkhbt (void)
    inst.instruction |= Rm;
    if (inst.operands[3].present)
      {
-      unsigned int val = inst.reloc.exp.X_add_number;
-      constraint (inst.reloc.exp.X_op != O_constant,
+      unsigned int val = inst.relocs[0].exp.X_add_number;
+      constraint (inst.relocs[0].exp.X_op != O_constant,
                   _("expression too complex"));
        inst.instruction |= (val & 0x1c) << 10;
        inst.instruction |= (val & 0x03) << 6;
@@ -12615,7 +13039,7 @@ do_t_push_pop (void)
  
    constraint (inst.operands[0].writeback,
               _("push/pop do not support {reglist}^"));
-  constraint (inst.reloc.type != BFD_RELOC_UNUSED,
+  constraint (inst.relocs[0].type != BFD_RELOC_UNUSED,
               _("expression too complex"));
  
    mask = inst.operands[0].imm;
@@ -12632,8 +13056,20 @@ do_t_push_pop (void)
    else if (unified_syntax)
      {
        inst.instruction = THUMB_OP32 (inst.instruction);
-      encode_thumb2_ldmstm (13, mask, TRUE);
+      encode_thumb2_multi (TRUE /* do_io */, 13, mask, TRUE);
+    }
+  else
+    {
+      inst.error = _("invalid register list to push/pop instruction");
+      return;
      }
+}
+
+static void
+do_t_clrm (void)
+{
+  if (unified_syntax)
+    encode_thumb2_multi (FALSE /* do_io */, -1, inst.operands[0].imm, FALSE);
    else
      {
        inst.error = _("invalid register list to push/pop instruction");
@@ -12641,6 +13077,24 @@ do_t_push_pop (void)
      }
  }
  
+static void
+do_t_vscclrm (void)
+{
+  if (inst.operands[0].issingle)
+    {
+      inst.instruction |= (inst.operands[0].reg & 0x1) << 22;
+      inst.instruction |= (inst.operands[0].reg & 0x1e) << 11;
+      inst.instruction |= inst.operands[0].imm;
+    }
+  else
+    {
+      inst.instruction |= (inst.operands[0].reg & 0x10) << 18;
+      inst.instruction |= (inst.operands[0].reg & 0xf) << 12;
+      inst.instruction |= 1 << 8;
+      inst.instruction |= inst.operands[0].imm << 1;
+    }
+}
+
  static void
  do_t_rbit (void)
  {
@@ -12723,9 +13177,9 @@ do_t_rsb (void)
        bfd_boolean narrow;
  
        if ((inst.instruction & 0x00100000) != 0)
-       narrow = !in_it_block ();
+       narrow = !in_pred_block ();
        else
-       narrow = in_it_block ();
+       narrow = in_pred_block ();
  
        if (Rd > 7 || Rs > 7)
         narrow = FALSE;
@@ -12733,15 +13187,15 @@ do_t_rsb (void)
        if (inst.size_req == 4 || !unified_syntax)
         narrow = FALSE;
  
-      if (inst.reloc.exp.X_op != O_constant
-         || inst.reloc.exp.X_add_number != 0)
+      if (inst.relocs[0].exp.X_op != O_constant
+         || inst.relocs[0].exp.X_add_number != 0)
         narrow = FALSE;
  
        /* Turn rsb #0 into 16-bit neg.  We should probably do this via
          relaxation, but it doesn't seem worth the hassle.  */
        if (narrow)
         {
-         inst.reloc.type = BFD_RELOC_UNUSED;
+         inst.relocs[0].type = BFD_RELOC_UNUSED;
           inst.instruction = THUMB_OP16 (T_MNEM_negs);
           inst.instruction |= Rs << 3;
           inst.instruction |= Rd;
@@ -12749,7 +13203,7 @@ do_t_rsb (void)
        else
         {
           inst.instruction = (inst.instruction & 0xe1ffffff) | 0x10000000;
-         inst.reloc.type = BFD_RELOC_ARM_T32_IMMEDIATE;
+         inst.relocs[0].type = BFD_RELOC_ARM_T32_IMMEDIATE;
         }
      }
    else
@@ -12763,7 +13217,7 @@ do_t_setend (void)
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8))
        as_tsktsk (_("setend use is deprecated for ARMv8"));
  
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
    if (inst.operands[0].imm)
      inst.instruction |= 0x8;
  }
@@ -12793,9 +13247,9 @@ do_t_shift (void)
         }
  
        if (THUMB_SETS_FLAGS (inst.instruction))
-       narrow = !in_it_block ();
+       narrow = !in_pred_block ();
        else
-       narrow = in_it_block ();
+       narrow = in_pred_block ();
        if (inst.operands[0].reg > 7 || inst.operands[1].reg > 7)
         narrow = FALSE;
        if (!inst.operands[2].isreg && shift_kind == SHIFT_ROR)
@@ -12833,7 +13287,7 @@ do_t_shift (void)
               inst.instruction |= inst.operands[0].reg << 8;
               encode_thumb32_shifted_operand (1);
               /* Prevent the incorrect generation of an ARM_IMMEDIATE fixup.  */
-             inst.reloc.type = BFD_RELOC_UNUSED;
+             inst.relocs[0].type = BFD_RELOC_UNUSED;
             }
         }
        else
@@ -12865,7 +13319,7 @@ do_t_shift (void)
                 case SHIFT_LSR: inst.instruction = T_OPCODE_LSR_I; break;
                 default: abort ();
                 }
-             inst.reloc.type = BFD_RELOC_ARM_THUMB_SHIFT;
+             inst.relocs[0].type = BFD_RELOC_ARM_THUMB_SHIFT;
               inst.instruction |= inst.operands[0].reg;
               inst.instruction |= inst.operands[1].reg << 3;
             }
@@ -12909,7 +13363,7 @@ do_t_shift (void)
             case T_MNEM_ror: inst.error = _("ror #imm not supported"); return;
             default: abort ();
             }
-         inst.reloc.type = BFD_RELOC_ARM_THUMB_SHIFT;
+         inst.relocs[0].type = BFD_RELOC_ARM_THUMB_SHIFT;
           inst.instruction |= inst.operands[0].reg;
           inst.instruction |= inst.operands[1].reg << 3;
         }
@@ -12955,25 +13409,25 @@ do_t_simd2 (void)
  static void
  do_t_smc (void)
  {
-  unsigned int value = inst.reloc.exp.X_add_number;
+  unsigned int value = inst.relocs[0].exp.X_add_number;
    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v7a),
               _("SMC is not permitted on this architecture"));
-  constraint (inst.reloc.exp.X_op != O_constant,
+  constraint (inst.relocs[0].exp.X_op != O_constant,
               _("expression too complex"));
-  inst.reloc.type = BFD_RELOC_UNUSED;
+  inst.relocs[0].type = BFD_RELOC_UNUSED;
    inst.instruction |= (value & 0xf000) >> 12;
    inst.instruction |= (value & 0x0ff0);
    inst.instruction |= (value & 0x000f) << 16;
    /* PR gas/15623: SMC instructions must be last in an IT block.  */
-  set_it_insn_type_last ();
+  set_pred_insn_type_last ();
  }
  
  static void
  do_t_hvc (void)
  {
-  unsigned int value = inst.reloc.exp.X_add_number;
+  unsigned int value = inst.relocs[0].exp.X_add_number;
  
-  inst.reloc.type = BFD_RELOC_UNUSED;
+  inst.relocs[0].type = BFD_RELOC_UNUSED;
    inst.instruction |= (value & 0x0fff);
    inst.instruction |= (value & 0xf000) << 4;
  }
@@ -12995,11 +13449,11 @@ do_t_ssat_usat (int bias)
  
    if (inst.operands[3].present)
      {
-      offsetT shift_amount = inst.reloc.exp.X_add_number;
+      offsetT shift_amount = inst.relocs[0].exp.X_add_number;
  
-      inst.reloc.type = BFD_RELOC_UNUSED;
+      inst.relocs[0].type = BFD_RELOC_UNUSED;
  
-      constraint (inst.reloc.exp.X_op != O_constant,
+      constraint (inst.relocs[0].exp.X_op != O_constant,
                   _("expression too complex"));
  
        if (shift_amount != 0)
@@ -13052,7 +13506,7 @@ do_t_strex (void)
    inst.instruction |= inst.operands[0].reg << 8;
    inst.instruction |= inst.operands[1].reg << 12;
    inst.instruction |= inst.operands[2].reg << 16;
-  inst.reloc.type = BFD_RELOC_ARM_T32_OFFSET_U8;
+  inst.relocs[0].type = BFD_RELOC_ARM_T32_OFFSET_U8;
  }
  
  static void
@@ -13130,7 +13584,7 @@ do_t_sxth (void)
  static void
  do_t_swi (void)
  {
-  inst.reloc.type = BFD_RELOC_ARM_SWI;
+  inst.relocs[0].type = BFD_RELOC_ARM_SWI;
  }
  
  static void
@@ -13140,7 +13594,7 @@ do_t_tb (void)
    int half;
  
    half = (inst.instruction & 0x10) != 0;
-  set_it_insn_type_last ();
+  set_pred_insn_type_last ();
    constraint (inst.operands[0].immisreg,
               _("instruction requires register index"));
  
@@ -13176,7 +13630,7 @@ do_t_udf (void)
        inst.instruction |= inst.operands[0].imm;
      }
  
-  set_it_insn_type (NEUTRAL_IT_INSN);
+  set_pred_insn_type (NEUTRAL_IT_INSN);
  }
  
  
@@ -13202,30 +13656,233 @@ do_t_usat16 (void)
    inst.instruction |= Rn << 16;
  }
  
-/* Neon instruction encoder helpers.  */
-
-/* Encodings for the different types for various Neon opcodes.  */
-
-/* An "invalid" code for the following tables.  */
-#define N_INV -1u
-
-struct neon_tab_entry
+/* Checking the range of the branch offset (VAL) with NBITS bits
+   and IS_SIGNED signedness.  Also checks the LSB to be 0.  */
+static int
+v8_1_branch_value_check (int val, int nbits, int is_signed)
  {
-  unsigned integer;
-  unsigned float_or_poly;
-  unsigned scalar_or_imm;
+  gas_assert (nbits > 0 && nbits <= 32);
+  if (is_signed)
+    {
+      int cmp = (1 << (nbits - 1));
+      if ((val < -cmp) || (val >= cmp) || (val & 0x01))
+       return FAIL;
+    }
+  else
+    {
+      if ((val <= 0) || (val >= (1 << nbits)) || (val & 0x1))
+       return FAIL;
+    }
+    return SUCCESS;
+}
+
+/* For branches in Armv8.1-M Mainline.  */
+static void
+do_t_branch_future (void)
+{
+  unsigned long insn = inst.instruction;
+
+  inst.instruction = THUMB_OP32 (inst.instruction);
+  if (inst.operands[0].hasreloc == 0)
+    {
+      if (v8_1_branch_value_check (inst.operands[0].imm, 5, FALSE) == FAIL)
+       as_bad (BAD_BRANCH_OFF);
+
+      inst.instruction |= ((inst.operands[0].imm & 0x1f) >> 1) << 23;
+    }
+  else
+    {
+      inst.relocs[0].type = BFD_RELOC_THUMB_PCREL_BRANCH5;
+      inst.relocs[0].pc_rel = 1;
+    }
+
+  switch (insn)
+    {
+      case T_MNEM_bf:
+       if (inst.operands[1].hasreloc == 0)
+         {
+           int val = inst.operands[1].imm;
+           if (v8_1_branch_value_check (inst.operands[1].imm, 17, TRUE) == FAIL)
+             as_bad (BAD_BRANCH_OFF);
+
+           int immA = (val & 0x0001f000) >> 12;
+           int immB = (val & 0x00000ffc) >> 2;
+           int immC = (val & 0x00000002) >> 1;
+           inst.instruction |= (immA << 16) | (immB << 1) | (immC << 11);
+         }
+       else
+         {
+           inst.relocs[1].type = BFD_RELOC_ARM_THUMB_BF17;
+           inst.relocs[1].pc_rel = 1;
+         }
+       break;
+
+      case T_MNEM_bfl:
+       if (inst.operands[1].hasreloc == 0)
+         {
+           int val = inst.operands[1].imm;
+           if (v8_1_branch_value_check (inst.operands[1].imm, 19, TRUE) == FAIL)
+             as_bad (BAD_BRANCH_OFF);
+
+           int immA = (val & 0x0007f000) >> 12;
+           int immB = (val & 0x00000ffc) >> 2;
+           int immC = (val & 0x00000002) >> 1;
+           inst.instruction |= (immA << 16) | (immB << 1) | (immC << 11);
+         }
+         else
+         {
+           inst.relocs[1].type = BFD_RELOC_ARM_THUMB_BF19;
+           inst.relocs[1].pc_rel = 1;
+         }
+       break;
+
+      case T_MNEM_bfcsel:
+       /* Operand 1.  */
+       if (inst.operands[1].hasreloc == 0)
+         {
+           int val = inst.operands[1].imm;
+           int immA = (val & 0x00001000) >> 12;
+           int immB = (val & 0x00000ffc) >> 2;
+           int immC = (val & 0x00000002) >> 1;
+           inst.instruction |= (immA << 16) | (immB << 1) | (immC << 11);
+         }
+         else
+         {
+           inst.relocs[1].type = BFD_RELOC_ARM_THUMB_BF13;
+           inst.relocs[1].pc_rel = 1;
+         }
+
+       /* Operand 2.  */
+       if (inst.operands[2].hasreloc == 0)
+         {
+             constraint ((inst.operands[0].hasreloc != 0), BAD_ARGS);
+             int val2 = inst.operands[2].imm;
+             int val0 = inst.operands[0].imm & 0x1f;
+             int diff = val2 - val0;
+             if (diff == 4)
+               inst.instruction |= 1 << 17; /* T bit.  */
+             else if (diff != 2)
+               as_bad (_("out of range label-relative fixup value"));
+         }
+       else
+         {
+             constraint ((inst.operands[0].hasreloc == 0), BAD_ARGS);
+             inst.relocs[2].type = BFD_RELOC_THUMB_PCREL_BFCSEL;
+             inst.relocs[2].pc_rel = 1;
+         }
+
+       /* Operand 3.  */
+       constraint (inst.cond != COND_ALWAYS, BAD_COND);
+       inst.instruction |= (inst.operands[3].imm & 0xf) << 18;
+       break;
+
+      case T_MNEM_bfx:
+      case T_MNEM_bflx:
+       inst.instruction |= inst.operands[1].reg << 16;
+       break;
+
+      default: abort ();
+    }
+}
+
+/* Helper function for do_t_loloop to handle relocations.  */
+static void
+v8_1_loop_reloc (int is_le)
+{
+  if (inst.relocs[0].exp.X_op == O_constant)
+    {
+      int value = inst.relocs[0].exp.X_add_number;
+      value = (is_le) ? -value : value;
+
+      if (v8_1_branch_value_check (value, 12, FALSE) == FAIL)
+       as_bad (BAD_BRANCH_OFF);
+
+      int imml, immh;
+
+      immh = (value & 0x00000ffc) >> 2;
+      imml = (value & 0x00000002) >> 1;
+
+      inst.instruction |= (imml << 11) | (immh << 1);
+    }
+  else
+    {
+      inst.relocs[0].type = BFD_RELOC_ARM_THUMB_LOOP12;
+      inst.relocs[0].pc_rel = 1;
+    }
+}
+
+/* To handle the Scalar Low Overhead Loop instructions
+   in Armv8.1-M Mainline.  */
+static void
+do_t_loloop (void)
+{
+  unsigned long insn = inst.instruction;
+
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  inst.instruction = THUMB_OP32 (inst.instruction);
+
+  switch (insn)
+    {
+    case T_MNEM_le:
+      /* le <label>.  */
+      if (!inst.operands[0].present)
+       inst.instruction |= 1 << 21;
+
+      v8_1_loop_reloc (TRUE);
+      break;
+
+    case T_MNEM_wls:
+      v8_1_loop_reloc (FALSE);
+      /* Fall through.  */
+    case T_MNEM_dls:
+      constraint (inst.operands[1].isreg != 1, BAD_ARGS);
+      inst.instruction |= (inst.operands[1].reg << 16);
+      break;
+
+    default: abort();
+    }
+}
+
+/* MVE instruction encoder helpers.  */
+#define M_MNEM_vabav   0xee800f01
+#define M_MNEM_vmladav   0xeef00e00
+#define M_MNEM_vmladava          0xeef00e20
+#define M_MNEM_vmladavx          0xeef01e00
+#define M_MNEM_vmladavax  0xeef01e20
+#define M_MNEM_vmlsdav   0xeef00e01
+#define M_MNEM_vmlsdava          0xeef00e21
+#define M_MNEM_vmlsdavx          0xeef01e01
+#define M_MNEM_vmlsdavax  0xeef01e21
+#define M_MNEM_vmullt  0xee011e00
+#define M_MNEM_vmullb  0xee010e00
+
+/* Neon instruction encoder helpers.  */
+
+/* Encodings for the different types for various Neon opcodes.  */
+
+/* An "invalid" code for the following tables.  */
+#define N_INV -1u
+
+struct neon_tab_entry
+{
+  unsigned integer;
+  unsigned float_or_poly;
+  unsigned scalar_or_imm;
  };
  
  /* Map overloaded Neon opcodes to their respective encodings.  */
  #define NEON_ENC_TAB                                   \
    X(vabd,      0x0000700, 0x1200d00, N_INV),           \
+  X(vabdl,     0x0800700, N_INV,     N_INV),           \
    X(vmax,      0x0000600, 0x0000f00, N_INV),           \
    X(vmin,      0x0000610, 0x0200f00, N_INV),           \
    X(vpadd,     0x0000b10, 0x1000d00, N_INV),           \
    X(vpmax,     0x0000a00, 0x1000f00, N_INV),           \
    X(vpmin,     0x0000a10, 0x1200f00, N_INV),           \
    X(vadd,      0x0000800, 0x0000d00, N_INV),           \
+  X(vaddl,     0x0800000, N_INV,     N_INV),           \
    X(vsub,      0x1000800, 0x0200d00, N_INV),           \
+  X(vsubl,     0x0800200, N_INV,     N_INV),           \
    X(vceq,      0x1000810, 0x0000e00, 0x1b10100),       \
    X(vcge,      0x0000310, 0x1000e00, 0x1b10080),       \
    X(vcgt,      0x0000300, 0x1200e00, 0x1b10000),       \
@@ -13362,12 +14019,14 @@ NEON_ENC_TAB
       - a table used to drive neon_select_shape.  */
  
  #define NEON_SHAPE_DEF                 \
+  X(3, (R, Q, Q), QUAD),               \
    X(3, (D, D, D), DOUBLE),             \
    X(3, (Q, Q, Q), QUAD),               \
    X(3, (D, D, I), DOUBLE),             \
    X(3, (Q, Q, I), QUAD),               \
    X(3, (D, D, S), DOUBLE),             \
    X(3, (Q, Q, S), QUAD),               \
+  X(3, (Q, Q, R), QUAD),               \
    X(2, (D, D), DOUBLE),                        \
    X(2, (Q, Q), QUAD),                  \
    X(2, (D, S), DOUBLE),                        \
@@ -13415,7 +14074,9 @@ NEON_ENC_TAB
    X (2, (H, I), HALF),                 \
    X (3, (H, H, H), HALF),              \
    X (3, (H, F, I), MIXED),             \
-  X (3, (F, H, I), MIXED)
+  X (3, (F, H, I), MIXED),             \
+  X (3, (D, H, H), MIXED),             \
+  X (3, (D, H, S), MIXED)
  
  #define S2(A,B)                NS_##A##B
  #define S3(A,B,C)      NS_##A##B##C
@@ -13555,6 +14216,9 @@ enum neon_type_mask
  #define N_I_ALL    (N_I8 | N_I16 | N_I32 | N_I64)
  #define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F16 | N_F32)
  #define N_F_ALL    (N_F16 | N_F32 | N_F64)
+#define N_I_MVE           (N_I8 | N_I16 | N_I32)
+#define N_F_MVE           (N_F16 | N_F32)
+#define N_SU_MVE   (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32)
  
  /* Pass this as the first type argument to neon_check_type to ignore types
     altogether.  */
@@ -14083,7 +14747,7 @@ neon_check_type (unsigned els, enum neon_shape ns, ...)
  
                   if ((given_type & types_allowed) == 0)
                     {
-                     first_error (_("bad type in Neon instruction"));
+                     first_error (BAD_SIMD_TYPE);
                       return badtype;
                     }
                 }
@@ -14526,6 +15190,75 @@ neon_logbits (unsigned x)
  #define LOW4(R) ((R) & 0xf)
  #define HI1(R) (((R) >> 4) & 1)
  
+static void
+mve_encode_qqr (int size, int fp)
+{
+  if (inst.operands[2].reg == REG_SP)
+    as_tsktsk (MVE_BAD_SP);
+  else if (inst.operands[2].reg == REG_PC)
+    as_tsktsk (MVE_BAD_PC);
+
+  if (fp)
+    {
+      /* vadd.  */
+      if (((unsigned)inst.instruction) == 0xd00)
+       inst.instruction = 0xee300f40;
+      /* vsub.  */
+      else if (((unsigned)inst.instruction) == 0x200d00)
+       inst.instruction = 0xee301f40;
+
+      /* Setting size which is 1 for F16 and 0 for F32.  */
+      inst.instruction |= (size == 16) << 28;
+    }
+  else
+    {
+      /* vadd.  */
+      if (((unsigned)inst.instruction) == 0x800)
+       inst.instruction = 0xee010f40;
+      /* vsub.  */
+      else if (((unsigned)inst.instruction) == 0x1000800)
+       inst.instruction = 0xee011f40;
+      /* Setting bits for size.  */
+      inst.instruction |= neon_logbits (size) << 20;
+    }
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= inst.operands[2].reg;
+  inst.is_neon = 1;
+}
+
+static void
+mve_encode_rqq (unsigned bit28, unsigned size)
+{
+  inst.instruction |= bit28 << 28;
+  inst.instruction |= neon_logbits (size) << 20;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= inst.operands[0].reg << 12;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+  inst.instruction |= LOW4 (inst.operands[2].reg);
+  inst.is_neon = 1;
+}
+
+static void
+mve_encode_qqq (int ubit, int size)
+{
+
+  inst.instruction |= (ubit != 0) << 28;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= neon_logbits (size) << 20;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+  inst.instruction |= LOW4 (inst.operands[2].reg);
+
+  inst.is_neon = 1;
+}
+
+
  /* Encode insns with bit pattern:
  
    |28/24|23|22 |21 20|19 16|15 12|11    8|7|6|5|4|3  0|
@@ -14847,26 +15580,27 @@ static void
  neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types,
                   unsigned destbits)
  {
-  enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_QQR, NS_NULL);
    struct neon_type_el et = neon_check_type (3, rs, N_EQK | destbits, N_EQK,
                                             types | N_KEY);
    if (et.type == NT_float)
      {
        NEON_ENCODE (FLOAT, inst);
-      neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
+      if (rs == NS_QQR)
+       mve_encode_qqr (et.size, 1);
+      else
+       neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
      }
    else
      {
        NEON_ENCODE (INTEGER, inst);
-      neon_three_same (neon_quad (rs), et.type == ubit_meaning, et.size);
+      if (rs == NS_QQR)
+       mve_encode_qqr (et.size, 0);
+      else
+       neon_three_same (neon_quad (rs), et.type == ubit_meaning, et.size);
      }
  }
  
-static void
-do_neon_dyadic_if_su (void)
-{
-  neon_dyadic_misc (NT_unsigned, N_SUF_32, 0);
-}
  
  static void
  do_neon_dyadic_if_su_d (void)
@@ -14925,32 +15659,93 @@ vfp_or_neon_is_neon (unsigned check)
         inst.instruction |= inst.uncond_value << 28;
      }
  
-  if ((check & NEON_CHECK_ARCH)
-      && !mark_feature_used (&fpu_neon_ext_v1))
+
+    if (((check & NEON_CHECK_ARCH) && !mark_feature_used (&fpu_neon_ext_v1))
+       || ((check & NEON_CHECK_ARCH8)
+           && !mark_feature_used (&fpu_neon_ext_armv8)))
+      {
+       first_error (_(BAD_FPU));
+       return FAIL;
+      }
+
+  return SUCCESS;
+}
+
+static int
+check_simd_pred_availability (int fp, unsigned check)
+{
+  if (inst.cond > COND_ALWAYS)
      {
-      first_error (_(BAD_FPU));
-      return FAIL;
+      if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       {
+         inst.error = BAD_FPU;
+         return 1;
+       }
+      inst.pred_insn_type = INSIDE_VPT_INSN;
      }
-
-  if ((check & NEON_CHECK_ARCH8)
-      && !mark_feature_used (&fpu_neon_ext_armv8))
+  else if (inst.cond < COND_ALWAYS)
      {
-      first_error (_(BAD_FPU));
-      return FAIL;
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+      else if (vfp_or_neon_is_neon (check) == FAIL)
+       return 2;
      }
+  else
+    {
+      if (!ARM_CPU_HAS_FEATURE (cpu_variant, fp ? mve_fp_ext : mve_ext)
+         && vfp_or_neon_is_neon (check) == FAIL)
+       return 3;
  
-  return SUCCESS;
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+    }
+  return 0;
  }
  
  static void
-do_neon_addsub_if_i (void)
+do_neon_dyadic_if_su (void)
  {
-  if (try_vfp_nsyn (3, do_vfp_nsyn_add_sub) == SUCCESS)
+  enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_QQR, NS_NULL);
+  struct neon_type_el et = neon_check_type (3, rs, N_EQK , N_EQK,
+                                           N_SUF_32 | N_KEY);
+
+  if (check_simd_pred_availability (et.type == NT_float,
+                                   NEON_CHECK_ARCH | NEON_CHECK_CC))
      return;
  
-  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+  neon_dyadic_misc (NT_unsigned, N_SUF_32, 0);
+}
+
+static void
+do_neon_addsub_if_i (void)
+{
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)
+      && try_vfp_nsyn (3, do_vfp_nsyn_add_sub) == SUCCESS)
      return;
  
+  enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_QQR, NS_NULL);
+  struct neon_type_el et = neon_check_type (3, rs, N_EQK,
+                                           N_EQK, N_IF_32 | N_I64 | N_KEY);
+
+  constraint (rs == NS_QQR && et.size == 64, BAD_FPU);
+  /* If we are parsing Q registers and the element types match MVE, which NEON
+     also supports, then we must check whether this is an instruction that can
+     be used by both MVE/NEON.  This distinction can be made based on whether
+     they are predicated or not.  */
+  if ((rs == NS_QQQ || rs == NS_QQR) && et.size != 64)
+    {
+      if (check_simd_pred_availability (et.type == NT_float,
+                                       NEON_CHECK_ARCH | NEON_CHECK_CC))
+       return;
+    }
+  else
+    {
+      /* If they are either in a D register or are using an unsupported.  */
+      if (rs != NS_QQR
+         && vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+       return;
+    }
+
    /* The "untyped" case can't happen. Do this to stop the "U" bit being
       affected if we specify unsigned args.  */
    neon_dyadic_misc (NT_untyped, N_IF_32 | N_I64, 0);
@@ -15182,6 +15977,120 @@ do_neon_qdmulh (void)
      }
  }
  
+static void
+do_mve_vmull (void)
+{
+
+  enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_DDS,
+                                         NS_QQS, NS_QQQ, NS_QQR, NS_NULL);
+  if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+      && inst.cond == COND_ALWAYS
+      && ((unsigned)inst.instruction) == M_MNEM_vmullt)
+    {
+      if (rs == NS_QQQ)
+       {
+
+         struct neon_type_el et = neon_check_type (3, rs, N_EQK , N_EQK,
+                                                   N_SUF_32 | N_F64 | N_P8
+                                                   | N_P16 | N_I_MVE | N_KEY);
+         if (((et.type == NT_poly) && et.size == 8
+              && ARM_CPU_IS_ANY (cpu_variant))
+             || (et.type == NT_integer) || (et.type == NT_float))
+           goto neon_vmul;
+       }
+      else
+       goto neon_vmul;
+    }
+
+  constraint (rs != NS_QQQ, BAD_FPU);
+  struct neon_type_el et = neon_check_type (3, rs, N_EQK , N_EQK,
+                                           N_SU_32 | N_P8 | N_P16 | N_KEY);
+
+  /* We are dealing with MVE's vmullt.  */
+  if (et.size == 32
+      && (inst.operands[0].reg == inst.operands[1].reg
+         || inst.operands[0].reg == inst.operands[2].reg))
+    as_tsktsk (BAD_MVE_SRCDEST);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  if (et.type == NT_poly)
+    mve_encode_qqq (neon_logbits (et.size), 64);
+  else
+    mve_encode_qqq (et.type == NT_unsigned, et.size);
+
+  return;
+
+neon_vmul:
+  inst.instruction = N_MNEM_vmul;
+  inst.cond = 0xb;
+  if (thumb_mode)
+    inst.pred_insn_type = INSIDE_IT_INSN;
+  do_neon_mul ();
+}
+
+static void
+do_mve_vabav (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_RQQ, NS_NULL);
+
+  if (rs == NS_NULL)
+    return;
+
+  if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    return;
+
+  struct neon_type_el et = neon_check_type (2, NS_NULL, N_EQK, N_KEY | N_S8
+                                           | N_S16 | N_S32 | N_U8 | N_U16
+                                           | N_U32);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  mve_encode_rqq (et.type == NT_unsigned, et.size);
+}
+
+static void
+do_mve_vmladav (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_RQQ, NS_NULL);
+  struct neon_type_el et = neon_check_type (3, rs,
+                                           N_EQK, N_EQK, N_SU_MVE | N_KEY);
+
+  if (et.type == NT_unsigned
+      && (inst.instruction == M_MNEM_vmladavx
+         || inst.instruction == M_MNEM_vmladavax
+         || inst.instruction == M_MNEM_vmlsdav
+         || inst.instruction == M_MNEM_vmlsdava
+         || inst.instruction == M_MNEM_vmlsdavx
+         || inst.instruction == M_MNEM_vmlsdavax))
+    first_error (BAD_SIMD_TYPE);
+
+  constraint (inst.operands[2].reg > 14,
+             _("MVE vector register in the range [Q0..Q7] expected"));
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  if (inst.instruction == M_MNEM_vmlsdav
+      || inst.instruction == M_MNEM_vmlsdava
+      || inst.instruction == M_MNEM_vmlsdavx
+      || inst.instruction == M_MNEM_vmlsdavax)
+    inst.instruction |= (et.size == 8) << 28;
+  else
+    inst.instruction |= (et.size == 8) << 8;
+
+  mve_encode_rqq (et.type == NT_unsigned, 64);
+  inst.instruction |= (et.size == 32) << 16;
+}
+
  static void
  do_neon_qrdmlah (void)
  {
@@ -15249,12 +16158,13 @@ do_neon_abs_neg (void)
    if (try_vfp_nsyn (2, do_vfp_nsyn_abs_neg) == SUCCESS)
      return;
  
-  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
-    return;
-
    rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
    et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY);
  
+  if (check_simd_pred_availability (et.type == NT_float,
+                                   NEON_CHECK_ARCH | NEON_CHECK_CC))
+    return;
+
    inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
    inst.instruction |= HI1 (inst.operands[0].reg) << 22;
    inst.instruction |= LOW4 (inst.operands[1].reg);
@@ -15632,7 +16542,7 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16),
                 _(BAD_FP16));
  
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
  
    switch (flavour)
      {
@@ -15787,7 +16697,7 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
        if (mode != neon_cvt_mode_x && mode != neon_cvt_mode_z)
         {
           NEON_ENCODE (FLOAT, inst);
-         set_it_insn_type (OUTSIDE_IT_INSN);
+         set_pred_insn_type (OUTSIDE_PRED_INSN);
  
           if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
             return;
@@ -15845,6 +16755,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
      /* Half-precision conversions for Advanced SIMD -- neon.  */
      case NS_QD:
      case NS_DQ:
+      if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+       return;
  
        if ((rs == NS_DQ)
           && (inst.vectype.el[0].size != 16 || inst.vectype.el[1].size != 32))
@@ -16086,10 +16998,49 @@ neon_mixed_length (struct neon_type_el et, unsigned size)
  static void
  do_neon_dyadic_long (void)
  {
-  /* FIXME: Type checking for lengthening op.  */
-  struct neon_type_el et = neon_check_type (3, NS_QDD,
-    N_EQK | N_DBL, N_EQK, N_SU_32 | N_KEY);
-  neon_mixed_length (et, et.size);
+  enum neon_shape rs = neon_select_shape (NS_QDD, NS_QQQ, NS_QQR, NS_NULL);
+  if (rs == NS_QDD)
+    {
+      if (vfp_or_neon_is_neon (NEON_CHECK_ARCH | NEON_CHECK_CC) == FAIL)
+       return;
+
+      NEON_ENCODE (INTEGER, inst);
+      /* FIXME: Type checking for lengthening op.  */
+      struct neon_type_el et = neon_check_type (3, NS_QDD,
+       N_EQK | N_DBL, N_EQK, N_SU_32 | N_KEY);
+      neon_mixed_length (et, et.size);
+    }
+  else if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+          && (inst.cond == 0xf || inst.cond == 0x10))
+    {
+      /* If parsing for MVE, vaddl/vsubl/vabdl{e,t} can only be vadd/vsub/vabd
+        in an IT block with le/lt conditions.  */
+
+      if (inst.cond == 0xf)
+       inst.cond = 0xb;
+      else if (inst.cond == 0x10)
+       inst.cond = 0xd;
+
+      inst.pred_insn_type = INSIDE_IT_INSN;
+
+      if (inst.instruction == N_MNEM_vaddl)
+       {
+         inst.instruction = N_MNEM_vadd;
+         do_neon_addsub_if_i ();
+       }
+      else if (inst.instruction == N_MNEM_vsubl)
+       {
+         inst.instruction = N_MNEM_vsub;
+         do_neon_addsub_if_i ();
+       }
+      else if (inst.instruction == N_MNEM_vabdl)
+       {
+         inst.instruction = N_MNEM_vabd;
+         do_neon_dyadic_if_su ();
+       }
+    }
+  else
+    first_error (BAD_FPU);
  }
  
  static void
@@ -16125,6 +17076,133 @@ do_neon_mac_maybe_scalar_long (void)
    neon_mac_reg_scalar_long (N_S16 | N_S32 | N_U16 | N_U32, N_SU_32);
  }
  
+/* Like neon_scalar_for_mul, this function generate Rm encoding from GAS's
+   internal SCALAR.  QUAD_P is 1 if it's for Q format, otherwise it's 0.  */
+
+static unsigned
+neon_scalar_for_fmac_fp16_long (unsigned scalar, unsigned quad_p)
+{
+  unsigned regno = NEON_SCALAR_REG (scalar);
+  unsigned elno = NEON_SCALAR_INDEX (scalar);
+
+  if (quad_p)
+    {
+      if (regno > 7 || elno > 3)
+       goto bad_scalar;
+
+      return ((regno & 0x7)
+             | ((elno & 0x1) << 3)
+             | (((elno >> 1) & 0x1) << 5));
+    }
+  else
+    {
+      if (regno > 15 || elno > 1)
+       goto bad_scalar;
+
+      return (((regno & 0x1) << 5)
+             | ((regno >> 1) & 0x7)
+             | ((elno & 0x1) << 3));
+    }
+
+bad_scalar:
+  first_error (_("scalar out of range for multiply instruction"));
+  return 0;
+}
+
+static void
+do_neon_fmac_maybe_scalar_long (int subtype)
+{
+  enum neon_shape rs;
+  int high8;
+  /* NOTE: vfmal/vfmsl use slightly different NEON three-same encoding.  'size"
+     field (bits[21:20]) has different meaning.  For scalar index variant, it's
+     used to differentiate add and subtract, otherwise it's with fixed value
+     0x2.  */
+  int size = -1;
+
+  if (inst.cond != COND_ALWAYS)
+    as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
+              "behaviour is UNPREDICTABLE"));
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
+             _(BAD_FP16));
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
+             _(BAD_FPU));
+
+  /* vfmal/vfmsl are in three-same D/Q register format or the third operand can
+     be a scalar index register.  */
+  if (inst.operands[2].isscalar)
+    {
+      high8 = 0xfe000000;
+      if (subtype)
+       size = 16;
+      rs = neon_select_shape (NS_DHS, NS_QDS, NS_NULL);
+    }
+  else
+    {
+      high8 = 0xfc000000;
+      size = 32;
+      if (subtype)
+       inst.instruction |= (0x1 << 23);
+      rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL);
+    }
+
+  neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16);
+
+  /* "opcode" from template has included "ubit", so simply pass 0 here.  Also,
+     the "S" bit in size field has been reused to differentiate vfmal and vfmsl,
+     so we simply pass -1 as size.  */
+  unsigned quad_p = (rs == NS_QDD || rs == NS_QDS);
+  neon_three_same (quad_p, 0, size);
+
+  /* Undo neon_dp_fixup.  Redo the high eight bits.  */
+  inst.instruction &= 0x00ffffff;
+  inst.instruction |= high8;
+
+#define LOW1(R) ((R) & 0x1)
+#define HI4(R) (((R) >> 1) & 0xf)
+  /* Unlike usually NEON three-same, encoding for Vn and Vm will depend on
+     whether the instruction is in Q form and whether Vm is a scalar indexed
+     operand.  */
+  if (inst.operands[2].isscalar)
+    {
+      unsigned rm
+       = neon_scalar_for_fmac_fp16_long (inst.operands[2].reg, quad_p);
+      inst.instruction &= 0xffffffd0;
+      inst.instruction |= rm;
+
+      if (!quad_p)
+       {
+         /* Redo Rn as well.  */
+         inst.instruction &= 0xfff0ff7f;
+         inst.instruction |= HI4 (inst.operands[1].reg) << 16;
+         inst.instruction |= LOW1 (inst.operands[1].reg) << 7;
+       }
+    }
+  else if (!quad_p)
+    {
+      /* Redo Rn and Rm.  */
+      inst.instruction &= 0xfff0ff50;
+      inst.instruction |= HI4 (inst.operands[1].reg) << 16;
+      inst.instruction |= LOW1 (inst.operands[1].reg) << 7;
+      inst.instruction |= HI4 (inst.operands[2].reg);
+      inst.instruction |= LOW1 (inst.operands[2].reg) << 5;
+    }
+}
+
+static void
+do_neon_vfmal (void)
+{
+  return do_neon_fmac_maybe_scalar_long (0);
+}
+
+static void
+do_neon_vfmsl (void)
+{
+  return do_neon_fmac_maybe_scalar_long (1);
+}
+
  static void
  do_neon_dyadic_wide (void)
  {
@@ -16500,7 +17578,26 @@ do_neon_mov (void)
      case NS_HI:
      case NS_FI:  /* case 10 (fconsts).  */
        ldconst = "fconsts";
-      encode_fconstd:
+    encode_fconstd:
+      if (!inst.operands[1].immisfloat)
+       {
+         unsigned new_imm;
+         /* Immediate has to fit in 8 bits so float is enough.  */
+         float imm = (float) inst.operands[1].imm;
+         memcpy (&new_imm, &imm, sizeof (float));
+         /* But the assembly may have been written to provide an integer
+            bit pattern that equates to a float, so check that the
+            conversion has worked.  */
+         if (is_quarter_float (new_imm))
+           {
+             if (is_quarter_float (inst.operands[1].imm))
+               as_warn (_("immediate constant is valid both as a bit-pattern and a floating point value (using the fp value)"));
+
+             inst.operands[1].imm = new_imm;
+             inst.operands[1].immisfloat = 1;
+           }
+       }
+
        if (is_quarter_float (inst.operands[1].imm))
         {
           inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm);
@@ -16589,8 +17686,19 @@ do_neon_movhf (void)
    enum neon_shape rs = neon_select_shape (NS_HH, NS_NULL);
    constraint (rs != NS_HH, _("invalid suffix"));
  
-  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
-             _(BAD_FPU));
+  if (inst.cond != COND_ALWAYS)
+    {
+      if (thumb_mode)
+       {
+         as_warn (_("ARMv8.2 scalar fp16 instruction cannot be conditional,"
+                    " the behaviour is UNPREDICTABLE"));
+       }
+      else
+       {
+         inst.error = BAD_COND;
+         return;
+       }
+    }
  
    do_vfp_sp_monadic ();
  
@@ -16788,6 +17896,55 @@ do_neon_ldr_str (void)
      }
  }
  
+static void
+do_t_vldr_vstr_sysreg (void)
+{
+  int fp_vldr_bitno = 20, sysreg_vldr_bitno = 20;
+  bfd_boolean is_vldr = ((inst.instruction & (1 << fp_vldr_bitno)) != 0);
+
+  /* Use of PC is UNPREDICTABLE.  */
+  if (inst.operands[1].reg == REG_PC)
+    inst.error = _("Use of PC here is UNPREDICTABLE");
+
+  if (inst.operands[1].immisreg)
+    inst.error = _("instruction does not accept register index");
+
+  if (!inst.operands[1].isreg)
+    inst.error = _("instruction does not accept PC-relative addressing");
+
+  if (abs (inst.operands[1].imm) >= (1 << 7))
+    inst.error = _("immediate value out of range");
+
+  inst.instruction = 0xec000f80;
+  if (is_vldr)
+    inst.instruction |= 1 << sysreg_vldr_bitno;
+  encode_arm_cp_address (1, TRUE, FALSE, BFD_RELOC_ARM_T32_VLDR_VSTR_OFF_IMM);
+  inst.instruction |= (inst.operands[0].imm & 0x7) << 13;
+  inst.instruction |= (inst.operands[0].imm & 0x8) << 19;
+}
+
+static void
+do_vldr_vstr (void)
+{
+  bfd_boolean sysreg_op = !inst.operands[0].isreg;
+
+  /* VLDR/VSTR (System Register).  */
+  if (sysreg_op)
+    {
+      if (!mark_feature_used (&arm_ext_v8_1m_main))
+       as_bad (_("Instruction not permitted on this architecture"));
+
+      do_t_vldr_vstr_sysreg ();
+    }
+  /* VLDR/VSTR.  */
+  else
+    {
+      if (!mark_feature_used (&fpu_vfp_ext_v1xd))
+       as_bad (_("Instruction not permitted on this architecture"));
+      do_neon_ldr_str ();
+    }
+}
+
  /* "interleave" version also handles non-interleaving register VLD1/VST1
     instructions.  */
  
@@ -17106,8 +18263,8 @@ do_neon_ldx_stx (void)
    else
      {
        constraint (inst.operands[1].immisreg, BAD_ADDR_MODE);
-      constraint (inst.reloc.exp.X_op != O_constant
-                 || inst.reloc.exp.X_add_number != 0,
+      constraint (inst.relocs[0].exp.X_op != O_constant
+                 || inst.relocs[0].exp.X_add_number != 0,
                   BAD_ADDR_MODE);
  
        if (inst.operands[1].writeback)
@@ -17156,7 +18313,7 @@ do_vfp_nsyn_fpv8 (enum neon_shape rs)
  static void
  do_vsel (void)
  {
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
  
    if (try_vfp_nsyn (3, do_vfp_nsyn_fpv8) != SUCCESS)
      first_error (_("invalid instruction shape"));
@@ -17165,7 +18322,7 @@ do_vsel (void)
  static void
  do_vmaxnm (void)
  {
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
  
    if (try_vfp_nsyn (3, do_vfp_nsyn_fpv8) == SUCCESS)
      return;
@@ -17198,7 +18355,7 @@ do_vrint_1 (enum neon_cvt_mode mode)
        /* VFP encodings.  */
        if (mode == neon_cvt_mode_a || mode == neon_cvt_mode_n
           || mode == neon_cvt_mode_p || mode == neon_cvt_mode_m)
-       set_it_insn_type (OUTSIDE_IT_INSN);
+       set_pred_insn_type (OUTSIDE_PRED_INSN);
  
        NEON_ENCODE (FPV8, inst);
        if (rs == NS_FF || rs == NS_HH)
@@ -17234,7 +18391,7 @@ do_vrint_1 (enum neon_cvt_mode mode)
        if (et.type == NT_invtype)
         return;
  
-      set_it_insn_type (OUTSIDE_IT_INSN);
+      set_pred_insn_type (OUTSIDE_PRED_INSN);
        NEON_ENCODE (FLOAT, inst);
  
        if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
@@ -17330,8 +18487,9 @@ do_vcmla (void)
  {
    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
               _(BAD_FPU));
-  constraint (inst.reloc.exp.X_op != O_constant, _("expression too complex"));
-  unsigned rot = inst.reloc.exp.X_add_number;
+  constraint (inst.relocs[0].exp.X_op != O_constant,
+             _("expression too complex"));
+  unsigned rot = inst.relocs[0].exp.X_add_number;
    constraint (rot != 0 && rot != 90 && rot != 180 && rot != 270,
               _("immediate out of range"));
    rot /= 90;
@@ -17371,8 +18529,9 @@ do_vcadd (void)
  {
    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
               _(BAD_FPU));
-  constraint (inst.reloc.exp.X_op != O_constant, _("expression too complex"));
-  unsigned rot = inst.reloc.exp.X_add_number;
+  constraint (inst.relocs[0].exp.X_op != O_constant,
+             _("expression too complex"));
+  unsigned rot = inst.relocs[0].exp.X_add_number;
    constraint (rot != 90 && rot != 270, _("immediate out of range"));
    enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL);
    unsigned size = neon_check_type (3, rs, N_EQK, N_EQK,
@@ -17461,7 +18620,7 @@ do_neon_dotproduct_u (void)
  static void
  do_crypto_2op_1 (unsigned elttype, int op)
  {
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
  
    if (neon_check_type (2, NS_QQ, N_EQK | N_UNT, elttype | N_UNT | N_KEY).type
        == NT_invtype)
@@ -17486,7 +18645,7 @@ do_crypto_2op_1 (unsigned elttype, int op)
  static void
  do_crypto_3op_1 (int u, int op)
  {
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
  
    if (neon_check_type (3, NS_QQQ, N_EQK | N_UNT, N_EQK | N_UNT,
                        N_32 | N_UNT | N_KEY).type == NT_invtype)
@@ -17589,7 +18748,7 @@ do_crc32_1 (unsigned int poly, unsigned int sz)
    unsigned int Rn = inst.operands[1].reg;
    unsigned int Rm = inst.operands[2].reg;
  
-  set_it_insn_type (OUTSIDE_IT_INSN);
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
    inst.instruction |= LOW4 (Rd) << (thumb_mode ? 8 : 12);
    inst.instruction |= LOW4 (Rn) << 16;
    inst.instruction |= LOW4 (Rm);
@@ -17722,18 +18881,18 @@ output_relax_insn (void)
       start of the instruction.  */
    dwarf2_emit_insn (0);
  
-  switch (inst.reloc.exp.X_op)
+  switch (inst.relocs[0].exp.X_op)
      {
      case O_symbol:
-      sym = inst.reloc.exp.X_add_symbol;
-      offset = inst.reloc.exp.X_add_number;
+      sym = inst.relocs[0].exp.X_add_symbol;
+      offset = inst.relocs[0].exp.X_add_number;
        break;
      case O_constant:
        sym = NULL;
-      offset = inst.reloc.exp.X_add_number;
+      offset = inst.relocs[0].exp.X_add_number;
        break;
      default:
-      sym = make_expr_symbol (&inst.reloc.exp);
+      sym = make_expr_symbol (&inst.relocs[0].exp);
        offset = 0;
        break;
    }
@@ -17789,10 +18948,14 @@ output_inst (const char * str)
    else
      md_number_to_chars (to, inst.instruction, inst.size);
  
-  if (inst.reloc.type != BFD_RELOC_UNUSED)
-    fix_new_arm (frag_now, to - frag_now->fr_literal,
-                inst.size, & inst.reloc.exp, inst.reloc.pc_rel,
-                inst.reloc.type);
+  int r;
+  for (r = 0; r < ARM_IT_MAX_RELOCS; r++)
+    {
+      if (inst.relocs[r].type != BFD_RELOC_UNUSED)
+       fix_new_arm (frag_now, to - frag_now->fr_literal,
+                    inst.size, & inst.relocs[r].exp, inst.relocs[r].pc_rel,
+                    inst.relocs[r].type);
+    }
  
    dwarf2_emit_insn (inst.size);
  }
@@ -17827,9 +18990,10 @@ enum opcode_tag
    OT_unconditionalF,   /* Instruction cannot be conditionalized
                            and carries 0xF in its ARM condition field.  */
    OT_csuffix,          /* Instruction takes a conditional suffix.  */
-  OT_csuffixF,         /* Some forms of the instruction take a conditional
-                          suffix, others place 0xF where the condition field
-                          would be.  */
+  OT_csuffixF,         /* Some forms of the instruction take a scalar
+                          conditional suffix, others place 0xF where the
+                          condition field would be, others take a vector
+                          conditional suffix.  */
    OT_cinfix3,          /* Instruction takes a conditional infix,
                            beginning at character index 3.  (In
                            unified mode, it becomes a suffix.)  */
@@ -17975,17 +19139,35 @@ opcode_lookup (char **str)
        inst.cond = cond->value;
        return opcode;
      }
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+   {
+    /* Cannot have a conditional suffix on a mnemonic of less than a character.
+     */
+    if (end - base < 2)
+      return NULL;
+     affix = end - 1;
+     cond = (const struct asm_cond *) hash_find_n (arm_vcond_hsh, affix, 1);
+     opcode = (const struct asm_opcode *) hash_find_n (arm_ops_hsh, base,
+                                                     affix - base);
+     /* If this opcode can not be vector predicated then don't accept it with a
+       vector predication code.  */
+     if (opcode && !opcode->mayBeVecPred)
+       opcode = NULL;
+   }
+  if (!opcode || !cond)
+    {
+      /* Cannot have a conditional suffix on a mnemonic of less than two
+        characters.  */
+      if (end - base < 3)
+       return NULL;
  
-  /* Cannot have a conditional suffix on a mnemonic of less than two
-     characters.  */
-  if (end - base < 3)
-    return NULL;
+      /* Look for suffixed mnemonic.  */
+      affix = end - 2;
+      cond = (const struct asm_cond *) hash_find_n (arm_cond_hsh, affix, 2);
+      opcode = (const struct asm_opcode *) hash_find_n (arm_ops_hsh, base,
+                                                       affix - base);
+    }
  
-  /* Look for suffixed mnemonic.  */
-  affix = end - 2;
-  cond = (const struct asm_cond *) hash_find_n (arm_cond_hsh, affix, 2);
-  opcode = (const struct asm_opcode *) hash_find_n (arm_ops_hsh, base,
-                                                   affix - base);
    if (opcode && cond)
      {
        /* step CE */
@@ -17999,7 +19181,7 @@ opcode_lookup (char **str)
         case OT_cinfix3_deprecated:
         case OT_odd_infix_unc:
           if (!unified_syntax)
-           return 0;
+           return NULL;
           /* Fall through.  */
  
         case OT_csuffix:
@@ -18064,7 +19246,7 @@ opcode_lookup (char **str)
  
  /* This function generates an initial IT instruction, leaving its block
     virtually open for the new instructions. Eventually,
-   the mask will be updated by now_it_add_mask () each time
+   the mask will be updated by now_pred_add_mask () each time
     a new instruction needs to be included in the IT block.
     Finally, the block is closed with close_automatic_it_block ().
     The block closure can be requested either from md_assemble (),
@@ -18073,14 +19255,14 @@ opcode_lookup (char **str)
  static void
  new_automatic_it_block (int cond)
  {
-  now_it.state = AUTOMATIC_IT_BLOCK;
-  now_it.mask = 0x18;
-  now_it.cc = cond;
-  now_it.block_length = 1;
+  now_pred.state = AUTOMATIC_PRED_BLOCK;
+  now_pred.mask = 0x18;
+  now_pred.cc = cond;
+  now_pred.block_length = 1;
    mapping_state (MAP_THUMB);
-  now_it.insn = output_it_inst (cond, now_it.mask, NULL);
-  now_it.warn_deprecated = FALSE;
-  now_it.insn_cond = TRUE;
+  now_pred.insn = output_it_inst (cond, now_pred.mask, NULL);
+  now_pred.warn_deprecated = FALSE;
+  now_pred.insn_cond = TRUE;
  }
  
  /* Close an automatic IT block.
@@ -18089,29 +19271,29 @@ new_automatic_it_block (int cond)
  static void
  close_automatic_it_block (void)
  {
-  now_it.mask = 0x10;
-  now_it.block_length = 0;
+  now_pred.mask = 0x10;
+  now_pred.block_length = 0;
  }
  
  /* Update the mask of the current automatically-generated IT
     instruction. See comments in new_automatic_it_block ().  */
  
  static void
-now_it_add_mask (int cond)
+now_pred_add_mask (int cond)
  {
  #define CLEAR_BIT(value, nbit)  ((value) & ~(1 << (nbit)))
  #define SET_BIT_VALUE(value, bitvalue, nbit)  (CLEAR_BIT (value, nbit) \
                                               | ((bitvalue) << (nbit)))
    const int resulting_bit = (cond & 1);
  
-  now_it.mask &= 0xf;
-  now_it.mask = SET_BIT_VALUE (now_it.mask,
+  now_pred.mask &= 0xf;
+  now_pred.mask = SET_BIT_VALUE (now_pred.mask,
                                    resulting_bit,
-                                 (5 - now_it.block_length));
-  now_it.mask = SET_BIT_VALUE (now_it.mask,
+                                 (5 - now_pred.block_length));
+  now_pred.mask = SET_BIT_VALUE (now_pred.mask,
                                    1,
-                                  ((5 - now_it.block_length) - 1) );
-  output_it_inst (now_it.cc, now_it.mask, now_it.insn);
+                                  ((5 - now_pred.block_length) - 1));
+  output_it_inst (now_pred.cc, now_pred.mask, now_pred.insn);
  
  #undef CLEAR_BIT
  #undef SET_BIT_VALUE
@@ -18119,9 +19301,9 @@ now_it_add_mask (int cond)
  
  /* The IT blocks handling machinery is accessed through the these functions:
       it_fsm_pre_encode ()               from md_assemble ()
-     set_it_insn_type ()                optional, from the tencode functions
-     set_it_insn_type_last ()           ditto
-     in_it_block ()                     ditto
+     set_pred_insn_type ()             optional, from the tencode functions
+     set_pred_insn_type_last ()                ditto
+     in_pred_block ()                  ditto
       it_fsm_post_encode ()              from md_assemble ()
       force_automatic_it_block_close ()  from label handling functions
  
@@ -18131,37 +19313,38 @@ now_it_add_mask (int cond)
         on the inst.condition.
       2) During the tencode function, two things may happen:
         a) The tencode function overrides the IT insn type by
-          calling either set_it_insn_type (type) or set_it_insn_type_last ().
+          calling either set_pred_insn_type (type) or
+          set_pred_insn_type_last ().
         b) The tencode function queries the IT block state by
-          calling in_it_block () (i.e. to determine narrow/not narrow mode).
+          calling in_pred_block () (i.e. to determine narrow/not narrow mode).
  
-       Both set_it_insn_type and in_it_block run the internal FSM state
-       handling function (handle_it_state), because: a) setting the IT insn
+       Both set_pred_insn_type and in_pred_block run the internal FSM state
+       handling function (handle_pred_state), because: a) setting the IT insn
         type may incur in an invalid state (exiting the function),
         and b) querying the state requires the FSM to be updated.
         Specifically we want to avoid creating an IT block for conditional
         branches, so it_fsm_pre_encode is actually a guess and we can't
         determine whether an IT block is required until the tencode () routine
         has decided what type of instruction this actually it.
-       Because of this, if set_it_insn_type and in_it_block have to be used,
-       set_it_insn_type has to be called first.
+       Because of this, if set_pred_insn_type and in_pred_block have to be
+       used, set_pred_insn_type has to be called first.
  
-       set_it_insn_type_last () is a wrapper of set_it_insn_type (type), that
-       determines the insn IT type depending on the inst.cond code.
+       set_pred_insn_type_last () is a wrapper of set_pred_insn_type (type),
+       that determines the insn IT type depending on the inst.cond code.
         When a tencode () routine encodes an instruction that can be
         either outside an IT block, or, in the case of being inside, has to be
-       the last one, set_it_insn_type_last () will determine the proper
+       the last one, set_pred_insn_type_last () will determine the proper
         IT instruction type based on the inst.cond code. Otherwise,
-       set_it_insn_type can be called for overriding that logic or
+       set_pred_insn_type can be called for overriding that logic or
         for covering other cases.
  
-       Calling handle_it_state () may not transition the IT block state to
-       OUTSIDE_IT_BLOCK immediately, since the (current) state could be
+       Calling handle_pred_state () may not transition the IT block state to
+       OUTSIDE_PRED_BLOCK immediately, since the (current) state could be
         still queried. Instead, if the FSM determines that the state should
-       be transitioned to OUTSIDE_IT_BLOCK, a flag is marked to be closed
+       be transitioned to OUTSIDE_PRED_BLOCK, a flag is marked to be closed
         after the tencode () function: that's what it_fsm_post_encode () does.
  
-       Since in_it_block () calls the state handling function to get an
+       Since in_pred_block () calls the state handling function to get an
         updated state, an error may occur (due to invalid insns combination).
         In that case, inst.error is set.
         Therefore, inst.error has to be checked after the execution of
@@ -18169,74 +19352,150 @@ now_it_add_mask (int cond)
  
       3) Back in md_assemble(), it_fsm_post_encode () is called to commit
         any pending state change (if any) that didn't take place in
-       handle_it_state () as explained above.  */
+       handle_pred_state () as explained above.  */
  
  static void
  it_fsm_pre_encode (void)
  {
    if (inst.cond != COND_ALWAYS)
-    inst.it_insn_type = INSIDE_IT_INSN;
+    inst.pred_insn_type =  INSIDE_IT_INSN;
    else
-    inst.it_insn_type = OUTSIDE_IT_INSN;
+    inst.pred_insn_type = OUTSIDE_PRED_INSN;
  
-  now_it.state_handled = 0;
+  now_pred.state_handled = 0;
  }
  
  /* IT state FSM handling function.  */
+/* MVE instructions and non-MVE instructions are handled differently because of
+   the introduction of VPT blocks.
+   Specifications say that any non-MVE instruction inside a VPT block is
+   UNPREDICTABLE, with the exception of the BKPT instruction.  Whereas most MVE
+   instructions are deemed to be UNPREDICTABLE if inside an IT block.  For the
+   few exceptions this will be handled at their respective handler functions.
+   The error messages provided depending on the different combinations possible
+   are described in the cases below:
+   For 'most' MVE instructions:
+   1) In an IT block, with an IT code: syntax error
+   2) In an IT block, with a VPT code: error: must be in a VPT block
+   3) In an IT block, with no code: warning: UNPREDICTABLE
+   4) In a VPT block, with an IT code: syntax error
+   5) In a VPT block, with a VPT code: OK!
+   6) In a VPT block, with no code: error: missing code
+   7) Outside a pred block, with an IT code: error: syntax error
+   8) Outside a pred block, with a VPT code: error: should be in a VPT block
+   9) Outside a pred block, with no code: OK!
+   For non-MVE instructions:
+   10) In an IT block, with an IT code: OK!
+   11) In an IT block, with a VPT code: syntax error
+   12) In an IT block, with no code: error: missing code
+   13) In a VPT block, with an IT code: error: should be in an IT block
+   14) In a VPT block, with a VPT code: syntax error
+   15) In a VPT block, with no code: UNPREDICTABLE
+   16) Outside a pred block, with an IT code: error: should be in an IT block
+   17) Outside a pred block, with a VPT code: syntax error
+   18) Outside a pred block, with no code: OK!
+ */
+
  
  static int
-handle_it_state (void)
+handle_pred_state (void)
  {
-  now_it.state_handled = 1;
-  now_it.insn_cond = FALSE;
+  now_pred.state_handled = 1;
+  now_pred.insn_cond = FALSE;
  
-  switch (now_it.state)
+  switch (now_pred.state)
      {
-    case OUTSIDE_IT_BLOCK:
-      switch (inst.it_insn_type)
+    case OUTSIDE_PRED_BLOCK:
+      switch (inst.pred_insn_type)
         {
-       case OUTSIDE_IT_INSN:
+       case MVE_OUTSIDE_PRED_INSN:
+         if (inst.cond < COND_ALWAYS)
+           {
+             /* Case 7: Outside a pred block, with an IT code: error: syntax
+                error.  */
+             inst.error = BAD_SYNTAX;
+             return FAIL;
+           }
+         /* Case 9:  Outside a pred block, with no code: OK!  */
+         break;
+       case OUTSIDE_PRED_INSN:
+         if (inst.cond > COND_ALWAYS)
+           {
+             /* Case 17:  Outside a pred block, with a VPT code: syntax error.
+              */
+             inst.error = BAD_SYNTAX;
+             return FAIL;
+           }
+         /* Case 18: Outside a pred block, with no code: OK!  */
           break;
  
+       case INSIDE_VPT_INSN:
+         /* Case 8: Outside a pred block, with a VPT code: error: should be in
+            a VPT block.  */
+         inst.error = BAD_OUT_VPT;
+         return FAIL;
+
         case INSIDE_IT_INSN:
         case INSIDE_IT_LAST_INSN:
-         if (thumb_mode == 0)
+         if (inst.cond < COND_ALWAYS)
             {
-             if (unified_syntax
-                 && !(implicit_it_mode & IMPLICIT_IT_MODE_ARM))
-               as_tsktsk (_("Warning: conditional outside an IT block"\
-                            " for Thumb."));
-           }
-         else
-           {
-             if ((implicit_it_mode & IMPLICIT_IT_MODE_THUMB)
-                 && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2))
+             /* Case 16: Outside a pred block, with an IT code: error: should
+                be in an IT block.  */
+             if (thumb_mode == 0)
                 {
-                 /* Automatically generate the IT instruction.  */
-                 new_automatic_it_block (inst.cond);
-                 if (inst.it_insn_type == INSIDE_IT_LAST_INSN)
-                   close_automatic_it_block ();
+                 if (unified_syntax
+                     && !(implicit_it_mode & IMPLICIT_IT_MODE_ARM))
+                   as_tsktsk (_("Warning: conditional outside an IT block"\
+                                " for Thumb."));
                 }
               else
                 {
-                 inst.error = BAD_OUT_IT;
-                 return FAIL;
+                 if ((implicit_it_mode & IMPLICIT_IT_MODE_THUMB)
+                     && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2))
+                   {
+                     /* Automatically generate the IT instruction.  */
+                     new_automatic_it_block (inst.cond);
+                     if (inst.pred_insn_type == INSIDE_IT_LAST_INSN)
+                       close_automatic_it_block ();
+                   }
+                 else
+                   {
+                     inst.error = BAD_OUT_IT;
+                     return FAIL;
+                   }
                 }
+             break;
             }
-         break;
-
+         else if (inst.cond > COND_ALWAYS)
+           {
+             /* Case 17: Outside a pred block, with a VPT code: syntax error.
+              */
+             inst.error = BAD_SYNTAX;
+             return FAIL;
+           }
+         else
+           gas_assert (0);
         case IF_INSIDE_IT_LAST_INSN:
         case NEUTRAL_IT_INSN:
           break;
  
+       case VPT_INSN:
+         if (inst.cond != COND_ALWAYS)
+           first_error (BAD_SYNTAX);
+         now_pred.state = MANUAL_PRED_BLOCK;
+         now_pred.block_length = 0;
+         now_pred.type = VECTOR_PRED;
+         now_pred.cc = 0;
+         break;
         case IT_INSN:
-         now_it.state = MANUAL_IT_BLOCK;
-         now_it.block_length = 0;
+         now_pred.state = MANUAL_PRED_BLOCK;
+         now_pred.block_length = 0;
+         now_pred.type = SCALAR_PRED;
           break;
         }
        break;
  
-    case AUTOMATIC_IT_BLOCK:
+    case AUTOMATIC_PRED_BLOCK:
        /* Three things may happen now:
          a) We should increment current it block size;
          b) We should close current it block (closing insn or 4 insns);
@@ -18244,82 +19503,211 @@ handle_it_state (void)
          to incompatible conditions or
          4 insns-length block reached).  */
  
-      switch (inst.it_insn_type)
+      switch (inst.pred_insn_type)
         {
-       case OUTSIDE_IT_INSN:
+       case INSIDE_VPT_INSN:
+       case VPT_INSN:
+       case MVE_OUTSIDE_PRED_INSN:
+         gas_assert (0);
+       case OUTSIDE_PRED_INSN:
           /* The closure of the block shall happen immediately,
-            so any in_it_block () call reports the block as closed.  */
+            so any in_pred_block () call reports the block as closed.  */
           force_automatic_it_block_close ();
           break;
  
         case INSIDE_IT_INSN:
         case INSIDE_IT_LAST_INSN:
         case IF_INSIDE_IT_LAST_INSN:
-         now_it.block_length++;
+         now_pred.block_length++;
  
-         if (now_it.block_length > 4
-             || !now_it_compatible (inst.cond))
+         if (now_pred.block_length > 4
+             || !now_pred_compatible (inst.cond))
             {
               force_automatic_it_block_close ();
-             if (inst.it_insn_type != IF_INSIDE_IT_LAST_INSN)
+             if (inst.pred_insn_type != IF_INSIDE_IT_LAST_INSN)
                 new_automatic_it_block (inst.cond);
             }
           else
             {
-             now_it.insn_cond = TRUE;
-             now_it_add_mask (inst.cond);
+             now_pred.insn_cond = TRUE;
+             now_pred_add_mask (inst.cond);
             }
  
-         if (now_it.state == AUTOMATIC_IT_BLOCK
-             && (inst.it_insn_type == INSIDE_IT_LAST_INSN
-                 || inst.it_insn_type == IF_INSIDE_IT_LAST_INSN))
+         if (now_pred.state == AUTOMATIC_PRED_BLOCK
+             && (inst.pred_insn_type == INSIDE_IT_LAST_INSN
+                 || inst.pred_insn_type == IF_INSIDE_IT_LAST_INSN))
             close_automatic_it_block ();
           break;
  
         case NEUTRAL_IT_INSN:
-         now_it.block_length++;
-         now_it.insn_cond = TRUE;
+         now_pred.block_length++;
+         now_pred.insn_cond = TRUE;
  
-         if (now_it.block_length > 4)
+         if (now_pred.block_length > 4)
             force_automatic_it_block_close ();
           else
-           now_it_add_mask (now_it.cc & 1);
+           now_pred_add_mask (now_pred.cc & 1);
           break;
  
         case IT_INSN:
           close_automatic_it_block ();
-         now_it.state = MANUAL_IT_BLOCK;
+         now_pred.state = MANUAL_PRED_BLOCK;
           break;
         }
        break;
  
-    case MANUAL_IT_BLOCK:
+    case MANUAL_PRED_BLOCK:
        {
-       /* Check conditional suffixes.  */
-       const int cond = now_it.cc ^ ((now_it.mask >> 4) & 1) ^ 1;
-       int is_last;
-       now_it.mask <<= 1;
-       now_it.mask &= 0x1f;
-       is_last = (now_it.mask == 0x10);
-       now_it.insn_cond = TRUE;
-
-       switch (inst.it_insn_type)
+       int cond, is_last;
+       if (now_pred.type == SCALAR_PRED)
           {
-         case OUTSIDE_IT_INSN:
-           inst.error = BAD_NOT_IT;
-           return FAIL;
+           /* Check conditional suffixes.  */
+           cond = now_pred.cc ^ ((now_pred.mask >> 4) & 1) ^ 1;
+           now_pred.mask <<= 1;
+           now_pred.mask &= 0x1f;
+           is_last = (now_pred.mask == 0x10);
+         }
+       else
+         {
+           now_pred.cc ^= (now_pred.mask >> 4);
+           cond = now_pred.cc + 0xf;
+           now_pred.mask <<= 1;
+           now_pred.mask &= 0x1f;
+           is_last = now_pred.mask == 0x10;
+         }
+       now_pred.insn_cond = TRUE;
  
+       switch (inst.pred_insn_type)
+         {
+         case OUTSIDE_PRED_INSN:
+           if (now_pred.type == SCALAR_PRED)
+             {
+               if (inst.cond == COND_ALWAYS)
+                 {
+                   /* Case 12: In an IT block, with no code: error: missing
+                      code.  */
+                   inst.error = BAD_NOT_IT;
+                   return FAIL;
+                 }
+               else if (inst.cond > COND_ALWAYS)
+                 {
+                   /* Case 11: In an IT block, with a VPT code: syntax error.
+                    */
+                   inst.error = BAD_SYNTAX;
+                   return FAIL;
+                 }
+               else if (thumb_mode)
+                 {
+                   /* This is for some special cases where a non-MVE
+                      instruction is not allowed in an IT block, such as cbz,
+                      but are put into one with a condition code.
+                      You could argue this should be a syntax error, but we
+                      gave the 'not allowed in IT block' diagnostic in the
+                      past so we will keep doing so.  */
+                   inst.error = BAD_NOT_IT;
+                   return FAIL;
+                 }
+               break;
+             }
+           else
+             {
+               /* Case 15: In a VPT block, with no code: UNPREDICTABLE.  */
+               as_tsktsk (MVE_NOT_VPT);
+               return SUCCESS;
+             }
+         case MVE_OUTSIDE_PRED_INSN:
+           if (now_pred.type == SCALAR_PRED)
+             {
+               if (inst.cond == COND_ALWAYS)
+                 {
+                   /* Case 3: In an IT block, with no code: warning:
+                      UNPREDICTABLE.  */
+                   as_tsktsk (MVE_NOT_IT);
+                   return SUCCESS;
+                 }
+               else if (inst.cond < COND_ALWAYS)
+                 {
+                   /* Case 1: In an IT block, with an IT code: syntax error.
+                    */
+                   inst.error = BAD_SYNTAX;
+                   return FAIL;
+                 }
+               else
+                 gas_assert (0);
+             }
+           else
+             {
+               if (inst.cond < COND_ALWAYS)
+                 {
+                   /* Case 4: In a VPT block, with an IT code: syntax error.
+                    */
+                   inst.error = BAD_SYNTAX;
+                   return FAIL;
+                 }
+               else if (inst.cond == COND_ALWAYS)
+                 {
+                   /* Case 6: In a VPT block, with no code: error: missing
+                      code.  */
+                   inst.error = BAD_NOT_VPT;
+                   return FAIL;
+                 }
+               else
+                 {
+                   gas_assert (0);
+                 }
+             }
           case INSIDE_IT_INSN:
-           if (cond != inst.cond)
+           if (inst.cond > COND_ALWAYS)
               {
-               inst.error = BAD_IT_COND;
+               /* Case 11: In an IT block, with a VPT code: syntax error.  */
+               /* Case 14: In a VPT block, with a VPT code: syntax error.  */
+               inst.error = BAD_SYNTAX;
+               return FAIL;
+             }
+           else if (now_pred.type == SCALAR_PRED)
+             {
+               /* Case 10: In an IT block, with an IT code: OK!  */
+               if (cond != inst.cond)
+                 {
+                   inst.error = now_pred.type == SCALAR_PRED ? BAD_IT_COND :
+                     BAD_VPT_COND;
+                   return FAIL;
+                 }
+             }
+           else
+             {
+               /* Case 13: In a VPT block, with an IT code: error: should be
+                  in an IT block.  */
+               inst.error = BAD_OUT_IT;
                 return FAIL;
               }
             break;
  
+         case INSIDE_VPT_INSN:
+           if (now_pred.type == SCALAR_PRED)
+             {
+               /* Case 2: In an IT block, with a VPT code: error: must be in a
+                  VPT block.  */
+               inst.error = BAD_OUT_VPT;
+               return FAIL;
+             }
+           /* Case 5:  In a VPT block, with a VPT code: OK!  */
+           else if (cond != inst.cond)
+             {
+               inst.error = BAD_VPT_COND;
+               return FAIL;
+             }
+           break;
           case INSIDE_IT_LAST_INSN:
           case IF_INSIDE_IT_LAST_INSN:
-           if (cond != inst.cond)
+           if (now_pred.type == VECTOR_PRED || inst.cond > COND_ALWAYS)
+             {
+               /* Case 4: In a VPT block, with an IT code: syntax error.  */
+               /* Case 11: In an IT block, with a VPT code: syntax error.  */
+               inst.error = BAD_SYNTAX;
+               return FAIL;
+             }
+           else if (cond != inst.cond)
               {
                 inst.error = BAD_IT_COND;
                 return FAIL;
@@ -18332,14 +19720,37 @@ handle_it_state (void)
             break;
  
           case NEUTRAL_IT_INSN:
-           /* The BKPT instruction is unconditional even in an IT block.  */
+           /* The BKPT instruction is unconditional even in a IT or VPT
+              block.  */
             break;
  
           case IT_INSN:
-           inst.error = BAD_IT_IT;
-           return FAIL;
+           if (now_pred.type == SCALAR_PRED)
+             {
+               inst.error = BAD_IT_IT;
+               return FAIL;
+             }
+           /* fall through.  */
+         case VPT_INSN:
+           if (inst.cond == COND_ALWAYS)
+             {
+               /* Executing a VPT/VPST instruction inside an IT block or a
+                  VPT/VPST/IT instruction inside a VPT block is UNPREDICTABLE.
+                */
+               if (now_pred.type == SCALAR_PRED)
+                 as_tsktsk (MVE_NOT_IT);
+               else
+                 as_tsktsk (MVE_NOT_VPT);
+               return SUCCESS;
+             }
+           else
+             {
+               /* VPT/VPST do not accept condition codes.  */
+               inst.error = BAD_SYNTAX;
+               return FAIL;
+             }
           }
-      }
+       }
        break;
      }
  
@@ -18373,19 +19784,20 @@ it_fsm_post_encode (void)
  {
    int is_last;
  
-  if (!now_it.state_handled)
-    handle_it_state ();
+  if (!now_pred.state_handled)
+    handle_pred_state ();
  
-  if (now_it.insn_cond
-      && !now_it.warn_deprecated
+  if (now_pred.insn_cond
+      && !now_pred.warn_deprecated
        && warn_on_deprecated
-      && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8))
+      && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8)
+      && !ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_m))
      {
        if (inst.instruction >= 0x10000)
         {
           as_tsktsk (_("IT blocks containing 32-bit Thumb instructions are "
-                    "deprecated in ARMv8"));
-         now_it.warn_deprecated = TRUE;
+                    "performance deprecated in ARMv8-A and ARMv8-R"));
+         now_pred.warn_deprecated = TRUE;
         }
        else
         {
@@ -18395,10 +19807,11 @@ it_fsm_post_encode (void)
             {
               if ((inst.instruction & p->mask) == p->pattern)
                 {
-                 as_tsktsk (_("IT blocks containing 16-bit Thumb instructions "
-                            "of the following class are deprecated in ARMv8: "
-                            "%s"), p->description);
-                 now_it.warn_deprecated = TRUE;
+                 as_tsktsk (_("IT blocks containing 16-bit Thumb "
+                              "instructions of the following class are "
+                              "performance deprecated in ARMv8-A and "
+                              "ARMv8-R: %s"), p->description);
+                 now_pred.warn_deprecated = TRUE;
                   break;
                 }
  
@@ -18406,40 +19819,41 @@ it_fsm_post_encode (void)
             }
         }
  
-      if (now_it.block_length > 1)
+      if (now_pred.block_length > 1)
         {
           as_tsktsk (_("IT blocks containing more than one conditional "
-                    "instruction are deprecated in ARMv8"));
-         now_it.warn_deprecated = TRUE;
+                    "instruction are performance deprecated in ARMv8-A and "
+                    "ARMv8-R"));
+         now_pred.warn_deprecated = TRUE;
         }
      }
  
-  is_last = (now_it.mask == 0x10);
-  if (is_last)
-    {
-      now_it.state = OUTSIDE_IT_BLOCK;
-      now_it.mask = 0;
-    }
+    is_last = (now_pred.mask == 0x10);
+    if (is_last)
+      {
+       now_pred.state = OUTSIDE_PRED_BLOCK;
+       now_pred.mask = 0;
+      }
  }
  
  static void
  force_automatic_it_block_close (void)
  {
-  if (now_it.state == AUTOMATIC_IT_BLOCK)
+  if (now_pred.state == AUTOMATIC_PRED_BLOCK)
      {
        close_automatic_it_block ();
-      now_it.state = OUTSIDE_IT_BLOCK;
-      now_it.mask = 0;
+      now_pred.state = OUTSIDE_PRED_BLOCK;
+      now_pred.mask = 0;
      }
  }
  
  static int
-in_it_block (void)
+in_pred_block (void)
  {
-  if (!now_it.state_handled)
-    handle_it_state ();
+  if (!now_pred.state_handled)
+    handle_pred_state ();
  
-  return now_it.state != OUTSIDE_IT_BLOCK;
+  return now_pred.state != OUTSIDE_PRED_BLOCK;
  }
  
  /* Whether OPCODE only has T32 encoding.  Since this function is only used by
@@ -18514,7 +19928,9 @@ md_assemble (char *str)
      }
  
    memset (&inst, '\0', sizeof (inst));
-  inst.reloc.type = BFD_RELOC_UNUSED;
+  int r;
+  for (r = 0; r < ARM_IT_MAX_RELOCS; r++)
+    inst.relocs[r].type = BFD_RELOC_UNUSED;
  
    opcode = opcode_lookup (&p);
    if (!opcode)
@@ -18590,7 +20006,7 @@ md_assemble (char *str)
  
        if (!parse_operands (p, opcode->operands, /*thumb=*/TRUE))
         {
-         /* Prepare the it_insn_type for those encodings that don't set
+         /* Prepare the pred_insn_type for those encodings that don't set
              it.  */
           it_fsm_pre_encode ();
  
@@ -18693,21 +20109,30 @@ md_assemble (char *str)
  }
  
  static void
-check_it_blocks_finished (void)
+check_pred_blocks_finished (void)
  {
  #ifdef OBJ_ELF
    asection *sect;
  
    for (sect = stdoutput->sections; sect != NULL; sect = sect->next)
-    if (seg_info (sect)->tc_segment_info_data.current_it.state
-       == MANUAL_IT_BLOCK)
+    if (seg_info (sect)->tc_segment_info_data.current_pred.state
+       == MANUAL_PRED_BLOCK)
        {
-       as_warn (_("section '%s' finished with an open IT block."),
-                sect->name);
+       if (now_pred.type == SCALAR_PRED)
+         as_warn (_("section '%s' finished with an open IT block."),
+                  sect->name);
+       else
+         as_warn (_("section '%s' finished with an open VPT/VPST block."),
+                  sect->name);
        }
  #else
-  if (now_it.state == MANUAL_IT_BLOCK)
-    as_warn (_("file finished with an open IT block."));
+  if (now_pred.state == MANUAL_PRED_BLOCK)
+    {
+      if (now_pred.type == SCALAR_PRED)
+       as_warn (_("file finished with an open IT block."));
+      else
+       as_warn (_("file finished with an open VPT/VPST block."));
+    }
  #endif
  }
  
@@ -19096,11 +20521,20 @@ static struct reloc_entry reloc_names[] =
    { "tlscall", BFD_RELOC_ARM_TLS_CALL},
         { "TLSCALL", BFD_RELOC_ARM_TLS_CALL},
    { "tlsdescseq", BFD_RELOC_ARM_TLS_DESCSEQ},
-       { "TLSDESCSEQ", BFD_RELOC_ARM_TLS_DESCSEQ}
+       { "TLSDESCSEQ", BFD_RELOC_ARM_TLS_DESCSEQ},
+  { "gotfuncdesc", BFD_RELOC_ARM_GOTFUNCDESC },
+       { "GOTFUNCDESC", BFD_RELOC_ARM_GOTFUNCDESC },
+  { "gotofffuncdesc", BFD_RELOC_ARM_GOTOFFFUNCDESC },
+       { "GOTOFFFUNCDESC", BFD_RELOC_ARM_GOTOFFFUNCDESC },
+  { "funcdesc", BFD_RELOC_ARM_FUNCDESC },
+       { "FUNCDESC", BFD_RELOC_ARM_FUNCDESC },
+   { "tlsgd_fdpic", BFD_RELOC_ARM_TLS_GD32_FDPIC },      { "TLSGD_FDPIC", BFD_RELOC_ARM_TLS_GD32_FDPIC },
+   { "tlsldm_fdpic", BFD_RELOC_ARM_TLS_LDM32_FDPIC },    { "TLSLDM_FDPIC", BFD_RELOC_ARM_TLS_LDM32_FDPIC },
+   { "gottpoff_fdpic", BFD_RELOC_ARM_TLS_IE32_FDPIC },   { "GOTTPOFF_FDIC", BFD_RELOC_ARM_TLS_IE32_FDPIC },
  };
  #endif
  
-/* Table of all conditional affixes.  0xF is not defined as a condition code.  */
+/* Table of all conditional affixes.  */
  static const struct asm_cond conds[] =
  {
    {"eq", 0x0},
@@ -19119,6 +20553,11 @@ static const struct asm_cond conds[] =
    {"le", 0xd},
    {"al", 0xe}
  };
+static const struct asm_cond vconds[] =
+{
+    {"t", 0xf},
+    {"e", 0x10}
+};
  
  #define UL_BARRIER(L,U,CODE,FEAT) \
    { L, CODE, ARM_FEATURE_CORE_LOW (FEAT) }, \
@@ -19177,7 +20616,7 @@ static struct asm_barrier_opt barrier_opt_names[] =
  /* The normal sort of mnemonic; has a Thumb variant; takes a conditional suffix.  */
  #define TxCE(mnem, op, top, nops, ops, ae, te) \
    { mnem, OPS##nops ops, OT_csuffix, 0x##op, top, ARM_VARIANT, \
-    THUMB_VARIANT, do_##ae, do_##te }
+    THUMB_VARIANT, do_##ae, do_##te, 0 }
  
  /* Two variants of the above - TCE for a numeric Thumb opcode, tCE for
     a T_MNEM_xyz enumerator.  */
@@ -19190,10 +20629,10 @@ static struct asm_barrier_opt barrier_opt_names[] =
     infix after the third character.  */
  #define TxC3(mnem, op, top, nops, ops, ae, te) \
    { mnem, OPS##nops ops, OT_cinfix3, 0x##op, top, ARM_VARIANT, \
-    THUMB_VARIANT, do_##ae, do_##te }
+    THUMB_VARIANT, do_##ae, do_##te, 0 }
  #define TxC3w(mnem, op, top, nops, ops, ae, te) \
    { mnem, OPS##nops ops, OT_cinfix3_deprecated, 0x##op, top, ARM_VARIANT, \
-    THUMB_VARIANT, do_##ae, do_##te }
+    THUMB_VARIANT, do_##ae, do_##te, 0 }
  #define TC3(mnem, aop, top, nops, ops, ae, te) \
        TxC3 (mnem, aop, 0x##top, nops, ops, ae, te)
  #define TC3w(mnem, aop, top, nops, ops, ae, te) \
@@ -19208,55 +20647,74 @@ static struct asm_barrier_opt barrier_opt_names[] =
     conditionally, so this is checked separately.  */
  #define TUE(mnem, op, top, nops, ops, ae, te)                          \
    { mnem, OPS##nops ops, OT_unconditional, 0x##op, 0x##top, ARM_VARIANT, \
-    THUMB_VARIANT, do_##ae, do_##te }
+    THUMB_VARIANT, do_##ae, do_##te, 0 }
  
  /* Same as TUE but the encoding function for ARM and Thumb modes is the same.
     Used by mnemonics that have very minimal differences in the encoding for
     ARM and Thumb variants and can be handled in a common function.  */
  #define TUEc(mnem, op, top, nops, ops, en) \
    { mnem, OPS##nops ops, OT_unconditional, 0x##op, 0x##top, ARM_VARIANT, \
-    THUMB_VARIANT, do_##en, do_##en }
+    THUMB_VARIANT, do_##en, do_##en, 0 }
  
  /* Mnemonic that cannot be conditionalized, and bears 0xF in its ARM
     condition code field.  */
  #define TUF(mnem, op, top, nops, ops, ae, te)                          \
    { mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0x##top, ARM_VARIANT, \
-    THUMB_VARIANT, do_##ae, do_##te }
+    THUMB_VARIANT, do_##ae, do_##te, 0 }
  
  /* ARM-only variants of all the above.  */
  #define CE(mnem,  op, nops, ops, ae)   \
-  { mnem, OPS##nops ops, OT_csuffix, 0x##op, 0x0, ARM_VARIANT, 0, do_##ae, NULL }
+  { mnem, OPS##nops ops, OT_csuffix, 0x##op, 0x0, ARM_VARIANT, 0, do_##ae, NULL, 0 }
  
  #define C3(mnem, op, nops, ops, ae)    \
-  { #mnem, OPS##nops ops, OT_cinfix3, 0x##op, 0x0, ARM_VARIANT, 0, do_##ae, NULL }
+  { #mnem, OPS##nops ops, OT_cinfix3, 0x##op, 0x0, ARM_VARIANT, 0, do_##ae, NULL, 0 }
+
+/* Thumb-only variants of TCE and TUE.  */
+#define ToC(mnem, top, nops, ops, te) \
+  { mnem, OPS##nops ops, OT_csuffix, 0x0, 0x##top, 0, THUMB_VARIANT, NULL, \
+    do_##te, 0 }
+
+#define ToU(mnem, top, nops, ops, te) \
+  { mnem, OPS##nops ops, OT_unconditional, 0x0, 0x##top, 0, THUMB_VARIANT, \
+    NULL, do_##te, 0 }
+
+/* T_MNEM_xyz enumerator variants of ToC.  */
+#define toC(mnem, top, nops, ops, te) \
+  { mnem, OPS##nops ops, OT_csuffix, 0x0, T_MNEM##top, 0, THUMB_VARIANT, NULL, \
+    do_##te, 0 }
+
+/* T_MNEM_xyz enumerator variants of ToU.  */
+#define toU(mnem, top, nops, ops, te) \
+  { mnem, OPS##nops ops, OT_unconditional, 0x0, T_MNEM##top, 0, THUMB_VARIANT, \
+    NULL, do_##te, 0 }
  
  /* Legacy mnemonics that always have conditional infix after the third
     character.  */
  #define CL(mnem, op, nops, ops, ae)    \
    { mnem, OPS##nops ops, OT_cinfix3_legacy, \
-    0x##op, 0x0, ARM_VARIANT, 0, do_##ae, NULL }
+    0x##op, 0x0, ARM_VARIANT, 0, do_##ae, NULL, 0 }
  
  /* Coprocessor instructions.  Isomorphic between Arm and Thumb-2.  */
  #define cCE(mnem,  op, nops, ops, ae)  \
-  { mnem, OPS##nops ops, OT_csuffix, 0x##op, 0xe##op, ARM_VARIANT, ARM_VARIANT, do_##ae, do_##ae }
+  { mnem, OPS##nops ops, OT_csuffix, 0x##op, 0xe##op, ARM_VARIANT, ARM_VARIANT, do_##ae, do_##ae, 0 }
  
  /* Legacy coprocessor instructions where conditional infix and conditional
     suffix are ambiguous.  For consistency this includes all FPA instructions,
     not just the potentially ambiguous ones.  */
  #define cCL(mnem, op, nops, ops, ae)   \
    { mnem, OPS##nops ops, OT_cinfix3_legacy, \
-    0x##op, 0xe##op, ARM_VARIANT, ARM_VARIANT, do_##ae, do_##ae }
+    0x##op, 0xe##op, ARM_VARIANT, ARM_VARIANT, do_##ae, do_##ae, 0 }
  
  /* Coprocessor, takes either a suffix or a position-3 infix
     (for an FPA corner case). */
  #define C3E(mnem, op, nops, ops, ae) \
    { mnem, OPS##nops ops, OT_csuf_or_in3, \
-    0x##op, 0xe##op, ARM_VARIANT, ARM_VARIANT, do_##ae, do_##ae }
+    0x##op, 0xe##op, ARM_VARIANT, ARM_VARIANT, do_##ae, do_##ae, 0 }
  
  #define xCM_(m1, m2, m3, op, nops, ops, ae)    \
    { m1 #m2 m3, OPS##nops ops, \
      sizeof (#m2) == 1 ? OT_odd_infix_unc : OT_odd_infix_0 + sizeof (m1) - 1, \
-    0x##op, 0x0, ARM_VARIANT, 0, do_##ae, NULL }
+    0x##op, 0x0, ARM_VARIANT, 0, do_##ae, NULL, 0 }
  
  #define CM(m1, m2, op, nops, ops, ae)  \
    xCM_ (m1,   , m2, op, nops, ops, ae),        \
@@ -19280,47 +20738,83 @@ static struct asm_barrier_opt barrier_opt_names[] =
    xCM_ (m1, al, m2, op, nops, ops, ae)
  
  #define UE(mnem, op, nops, ops, ae)    \
-  { #mnem, OPS##nops ops, OT_unconditional, 0x##op, 0, ARM_VARIANT, 0, do_##ae, NULL }
+  { #mnem, OPS##nops ops, OT_unconditional, 0x##op, 0, ARM_VARIANT, 0, do_##ae, NULL, 0 }
  
  #define UF(mnem, op, nops, ops, ae)    \
-  { #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0, ARM_VARIANT, 0, do_##ae, NULL }
+  { #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0, ARM_VARIANT, 0, do_##ae, NULL, 0 }
  
  /* Neon data-processing. ARM versions are unconditional with cond=0xf.
     The Thumb and ARM variants are mostly the same (bits 0-23 and 24/28), so we
     use the same encoding function for each.  */
  #define NUF(mnem, op, nops, ops, enc)                                  \
    { #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0x##op,           \
-    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc }
+    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc, 0 }
  
  /* Neon data processing, version which indirects through neon_enc_tab for
     the various overloaded versions of opcodes.  */
  #define nUF(mnem, op, nops, ops, enc)                                  \
    { #mnem, OPS##nops ops, OT_unconditionalF, N_MNEM##op, N_MNEM##op,   \
-    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc }
+    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc, 0 }
  
  /* Neon insn with conditional suffix for the ARM version, non-overloaded
     version.  */
-#define NCE_tag(mnem, op, nops, ops, enc, tag)                         \
+#define NCE_tag(mnem, op, nops, ops, enc, tag, mve_p)                          \
    { #mnem, OPS##nops ops, tag, 0x##op, 0x##op, ARM_VARIANT,            \
-    THUMB_VARIANT, do_##enc, do_##enc }
+    THUMB_VARIANT, do_##enc, do_##enc, mve_p }
  
  #define NCE(mnem, op, nops, ops, enc)                                  \
-   NCE_tag (mnem, op, nops, ops, enc, OT_csuffix)
+   NCE_tag (mnem, op, nops, ops, enc, OT_csuffix, 0)
  
  #define NCEF(mnem, op, nops, ops, enc)                                 \
-    NCE_tag (mnem, op, nops, ops, enc, OT_csuffixF)
+    NCE_tag (mnem, op, nops, ops, enc, OT_csuffixF, 0)
  
  /* Neon insn with conditional suffix for the ARM version, overloaded types.  */
-#define nCE_tag(mnem, op, nops, ops, enc, tag)                         \
+#define nCE_tag(mnem, op, nops, ops, enc, tag, mve_p)                          \
    { #mnem, OPS##nops ops, tag, N_MNEM##op, N_MNEM##op,         \
-    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc }
+    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc, mve_p }
  
  #define nCE(mnem, op, nops, ops, enc)                                  \
-   nCE_tag (mnem, op, nops, ops, enc, OT_csuffix)
+   nCE_tag (mnem, op, nops, ops, enc, OT_csuffix, 0)
  
  #define nCEF(mnem, op, nops, ops, enc)                                 \
-    nCE_tag (mnem, op, nops, ops, enc, OT_csuffixF)
+    nCE_tag (mnem, op, nops, ops, enc, OT_csuffixF, 0)
+
+/*   */
+#define mCEF(mnem, op, nops, ops, enc)                         \
+  { #mnem, OPS##nops ops, OT_csuffixF, M_MNEM##op, M_MNEM##op, \
+    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc, 1 }
+
+
+/* nCEF but for MVE predicated instructions.  */
+#define mnCEF(mnem, op, nops, ops, enc)                                        \
+    nCE_tag (mnem, op, nops, ops, enc, OT_csuffixF, 1)
+
+/* nCE but for MVE predicated instructions.  */
+#define mnCE(mnem, op, nops, ops, enc)                                 \
+   nCE_tag (mnem, op, nops, ops, enc, OT_csuffix, 1)
+
+/* NUF but for potentially MVE predicated instructions.  */
+#define MNUF(mnem, op, nops, ops, enc)                                 \
+  { #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0x##op,           \
+    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc, 1 }
+
+/* nUF but for potentially MVE predicated instructions.  */
+#define mnUF(mnem, op, nops, ops, enc)                                 \
+  { #mnem, OPS##nops ops, OT_unconditionalF, N_MNEM##op, N_MNEM##op,   \
+    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc, 1 }
+
+/* ToC but for potentially MVE predicated instructions.  */
+#define mToC(mnem, top, nops, ops, te) \
+  { mnem, OPS##nops ops, OT_csuffix, 0x0, 0x##top, 0, THUMB_VARIANT, NULL, \
+    do_##te, 1 }
+
+/* NCE but for MVE predicated instructions.  */
+#define MNCE(mnem, op, nops, ops, enc)                                 \
+   NCE_tag (mnem, op, nops, ops, enc, OT_csuffix, 1)
  
+/* NCEF but for MVE predicated instructions.  */
+#define MNCEF(mnem, op, nops, ops, enc)                                        \
+    NCE_tag (mnem, op, nops, ops, enc, OT_csuffixF, 1)
  #define do_0 0
  
  static const struct asm_opcode insns[] =
@@ -19742,9 +21236,9 @@ static const struct asm_opcode insns[] =
   TCE("usat16", 6e00f30, f3a00000, 3, (RRnpc, I15, RRnpc),         usat16, t_usat16),
  
  #undef  ARM_VARIANT
-#define ARM_VARIANT   & arm_ext_v6k
+#define ARM_VARIANT   & arm_ext_v6k_v6t2
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT & arm_ext_v6k
+#define THUMB_VARIANT & arm_ext_v6k_v6t2
  
   tCE("yield",  320f001, _yield,    0, (), noargs, t_hint),
   tCE("wfe",    320f002, _wfe,      0, (), noargs, t_hint),
@@ -19810,6 +21304,17 @@ static const struct asm_opcode insns[] =
   TC3("ldrsbt", 03000d0, f9100e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
   TC3("strht",  02000b0, f8200e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
  
+#undef  ARM_VARIANT
+#define ARM_VARIANT    & arm_ext_v3
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
+
+ TUE("csdb",   320f014, f3af8014, 0, (), noargs, t_csdb),
+ TUF("ssbb",   57ff040, f3bf8f40, 0, (), noargs, t_csdb),
+ TUF("pssbb",  57ff044, f3bf8f44, 0, (), noargs, t_csdb),
+
+#undef  ARM_VARIANT
+#define ARM_VARIANT    & arm_ext_v6t2
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6t2_v8m
   TCE("movw",   3000000, f2400000, 2, (RRnpc, HALF),                mov16, t_mov16),
@@ -19921,11 +21426,22 @@ static const struct asm_opcode insns[] =
  #define THUMB_VARIANT & arm_ext_v8
  
   tCE("sevl",   320f005, _sevl,    0, (),               noargs, t_hint),
- TUE("hlt",    1000070, ba80,     1, (oIffffb),        bkpt,   t_hlt),
   TCE("ldaexd", 1b00e9f, e8d000ff, 3, (RRnpc, oRRnpc, RRnpcb),
                                                         ldrexd, t_ldrexd),
   TCE("stlexd", 1a00e90, e8c000f0, 4, (RRnpc, RRnpc, oRRnpc, RRnpcb),
                                                         strexd, t_strexd),
+
+/* Defined in V8 but is in undefined encoding space for earlier
+   architectures.  However earlier architectures are required to treat
+   this instuction as a semihosting trap as well.  Hence while not explicitly
+   defined as such, it is in fact correct to define the instruction for all
+   architectures.  */
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v1
+#undef  ARM_VARIANT
+#define ARM_VARIANT  & arm_ext_v1
+ TUE("hlt",    1000070, ba80,     1, (oIffffb),        bkpt,   t_hlt),
+
   /* ARMv8 T32 only.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT  NULL
@@ -20602,20 +22118,12 @@ static const struct asm_opcode insns[] =
   nCEF(vmla,     _vmla,    3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
   nCEF(vmls,     _vmls,    3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
  
- nCEF(vadd,     _vadd,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_addsub_if_i),
- nCEF(vsub,     _vsub,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_addsub_if_i),
-
- NCEF(vabs,     1b10300, 2, (RNSDQ, RNSDQ), neon_abs_neg),
- NCEF(vneg,     1b10380, 2, (RNSDQ, RNSDQ), neon_abs_neg),
-
   NCE(vldm,      c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
   NCE(vldmia,    c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
   NCE(vldmdb,    d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
   NCE(vstm,      c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
   NCE(vstmia,    c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
   NCE(vstmdb,    d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vldr,      d100b00, 2, (RVSD, ADDRGLDC), neon_ldr_str),
- NCE(vstr,      d000b00, 2, (RVSD, ADDRGLDC), neon_ldr_str),
  
   nCEF(vcvt,     _vcvt,   3, (RNSDQ, RNSDQ, oI32z), neon_cvt),
   nCEF(vcvtr,    _vcvt,   2, (RNSDQ, RNSDQ), neon_cvtr),
@@ -20627,6 +22135,15 @@ static const struct asm_opcode insns[] =
   NCE(vmov,      0,       1, (VMOV), neon_mov),
   NCE(vmovq,     0,       1, (VMOV), neon_mov),
  
+#undef  THUMB_VARIANT
+/* Could be either VLDR/VSTR or VLDR/VSTR (system register) which are guarded
+   by different feature bits.  Since we are setting the Thumb guard, we can
+   require Thumb-1 which makes it a nop guard and set the right feature bit in
+   do_vldr_vstr ().  */
+#define THUMB_VARIANT  & arm_ext_v4t
+ NCE(vldr,      d100b00, 2, (VLDR, ADDRGLDC), vldr_vstr),
+ NCE(vstr,      d000b00, 2, (VLDR, ADDRGLDC), vldr_vstr),
+
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & arm_ext_fp16
  #undef  THUMB_VARIANT
@@ -20636,6 +22153,10 @@ static const struct asm_opcode insns[] =
   NCE (vmovx,     eb00a40,       2, (RVS, RVS), neon_movhf),
   NCE (vins,      eb00ac0,       2, (RVS, RVS), neon_movhf),
  
+ /* New backported fma/fms instructions optional in v8.2.  */
+ NCE (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
+ NCE (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
+
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_neon_ext_v1
  #undef  ARM_VARIANT
@@ -20686,7 +22207,6 @@ static const struct asm_opcode insns[] =
   NUF(vbif,      1300110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
   NUF(vbifq,     1300110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
    /* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32.  */
- nUF(vabd,      _vabd,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
   nUF(vabdq,     _vabd,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
   nUF(vmax,      _vmax,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
   nUF(vmaxq,     _vmax,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
@@ -20787,9 +22307,6 @@ static const struct asm_opcode insns[] =
    /* Data processing, three registers of different lengths.  */
    /* Dyadic, long insns. Types S8 S16 S32 U8 U16 U32.  */
   NUF(vabal,     0800500, 3, (RNQ, RND, RND),  neon_abal),
- NUF(vabdl,     0800700, 3, (RNQ, RND, RND),  neon_dyadic_long),
- NUF(vaddl,     0800000, 3, (RNQ, RND, RND),  neon_dyadic_long),
- NUF(vsubl,     0800200, 3, (RNQ, RND, RND),  neon_dyadic_long),
    /* If not scalar, fall back to neon_dyadic_long.
       Vector types as above, scalar types S16 S32 U16 U32.  */
   nUF(vmlal,     _vmlal,   3, (RNQ, RND, RND_RNSC), neon_mac_maybe_scalar_long),
@@ -21255,25 +22772,107 @@ static const struct asm_opcode insns[] =
   cCE("cfmadda32", e200600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
   cCE("cfmsuba32", e300600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
  
+ /* ARMv8.5-A instructions.  */
+#undef  ARM_VARIANT
+#define ARM_VARIANT   & arm_ext_sb
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_sb
+ TUF("sb", 57ff070, f3bf8f70, 0, (), noargs, noargs),
+
+#undef  ARM_VARIANT
+#define ARM_VARIANT   & arm_ext_predres
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_predres
+ CE("cfprctx", e070f93, 1, (RRnpc), rd),
+ CE("dvprctx", e070fb3, 1, (RRnpc), rd),
+ CE("cpprctx", e070ff3, 1, (RRnpc), rd),
+
   /* ARMv8-M instructions.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT NULL
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT & arm_ext_v8m
- TUE("sg", 0, e97fe97f, 0, (), 0, noargs),
- TUE("blxns", 0, 4784, 1, (RRnpc), 0, t_blx),
- TUE("bxns", 0, 4704, 1, (RRnpc), 0, t_bx),
- TUE("tt", 0, e840f000, 2, (RRnpc, RRnpc), 0, tt),
- TUE("ttt", 0, e840f040, 2, (RRnpc, RRnpc), 0, tt),
- TUE("tta", 0, e840f080, 2, (RRnpc, RRnpc), 0, tt),
- TUE("ttat", 0, e840f0c0, 2, (RRnpc, RRnpc), 0, tt),
+ ToU("sg",    e97fe97f,        0, (),             noargs),
+ ToC("blxns", 4784,    1, (RRnpc),        t_blx),
+ ToC("bxns",  4704,    1, (RRnpc),        t_bx),
+ ToC("tt",    e840f000,        2, (RRnpc, RRnpc), tt),
+ ToC("ttt",   e840f040,        2, (RRnpc, RRnpc), tt),
+ ToC("tta",   e840f080,        2, (RRnpc, RRnpc), tt),
+ ToC("ttat",  e840f0c0,        2, (RRnpc, RRnpc), tt),
  
   /* FP for ARMv8-M Mainline.  Enabled for ARMv8-M Mainline because the
      instructions behave as nop if no VFP is present.  */
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT & arm_ext_v8m_main
- TUEc("vlldm", 0,       ec300a00, 1, (RRnpc),  rn),
- TUEc("vlstm", 0,       ec200a00, 1, (RRnpc),  rn),
+ ToC("vlldm", ec300a00, 1, (RRnpc), rn),
+ ToC("vlstm", ec200a00, 1, (RRnpc), rn),
+
+ /* Armv8.1-M Mainline instructions.  */
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v8_1m_main
+ toC("bf",     _bf,    2, (EXPs, EXPs),             t_branch_future),
+ toU("bfcsel", _bfcsel,        4, (EXPs, EXPs, EXPs, COND), t_branch_future),
+ toC("bfx",    _bfx,   2, (EXPs, RRnpcsp),          t_branch_future),
+ toC("bfl",    _bfl,   2, (EXPs, EXPs),             t_branch_future),
+ toC("bflx",   _bflx,  2, (EXPs, RRnpcsp),          t_branch_future),
+
+ toU("dls", _dls, 2, (LR, RRnpcsp),     t_loloop),
+ toU("wls", _wls, 3, (LR, RRnpcsp, EXP), t_loloop),
+ toU("le",  _le,  2, (oLR, EXP),        t_loloop),
+
+ ToC("clrm",   e89f0000, 1, (CLRMLST),  t_clrm),
+ ToC("vscclrm",        ec9f0a00, 1, (VRSDVLST), t_vscclrm),
+
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & mve_ext
+ ToC("vpst",   fe710f4d, 0, (), mve_vpt),
+ ToC("vpstt",  fe318f4d, 0, (), mve_vpt),
+ ToC("vpste",  fe718f4d, 0, (), mve_vpt),
+ ToC("vpsttt", fe314f4d, 0, (), mve_vpt),
+ ToC("vpstte", fe31cf4d, 0, (), mve_vpt),
+ ToC("vpstet", fe71cf4d, 0, (), mve_vpt),
+ ToC("vpstee", fe714f4d, 0, (), mve_vpt),
+ ToC("vpstttt",        fe312f4d, 0, (), mve_vpt),
+ ToC("vpsttte", fe316f4d, 0, (), mve_vpt),
+ ToC("vpsttet",        fe31ef4d, 0, (), mve_vpt),
+ ToC("vpsttee",        fe31af4d, 0, (), mve_vpt),
+ ToC("vpstett",        fe71af4d, 0, (), mve_vpt),
+ ToC("vpstete",        fe71ef4d, 0, (), mve_vpt),
+ ToC("vpsteet",        fe716f4d, 0, (), mve_vpt),
+ ToC("vpsteee",        fe712f4d, 0, (), mve_vpt),
+
+ /* MVE and MVE FP only.  */
+ mCEF(vmullb,  _vmullb,    3, (RMQ, RMQ, RMQ),                   mve_vmull),
+ mCEF(vabav,   _vabav,     3, (RRnpcsp, RMQ, RMQ),               mve_vabav),
+ mCEF(vmladav,   _vmladav,     3, (RRe, RMQ, RMQ),             mve_vmladav),
+ mCEF(vmladava,          _vmladava,    3, (RRe, RMQ, RMQ),             mve_vmladav),
+ mCEF(vmladavx,          _vmladavx,    3, (RRe, RMQ, RMQ),             mve_vmladav),
+ mCEF(vmladavax,  _vmladavax,  3, (RRe, RMQ, RMQ),             mve_vmladav),
+ mCEF(vmlav,     _vmladav,     3, (RRe, RMQ, RMQ),             mve_vmladav),
+ mCEF(vmlava,    _vmladava,    3, (RRe, RMQ, RMQ),             mve_vmladav),
+ mCEF(vmlsdav,   _vmlsdav,     3, (RRe, RMQ, RMQ),             mve_vmladav),
+ mCEF(vmlsdava,          _vmlsdava,    3, (RRe, RMQ, RMQ),             mve_vmladav),
+ mCEF(vmlsdavx,          _vmlsdavx,    3, (RRe, RMQ, RMQ),             mve_vmladav),
+ mCEF(vmlsdavax,  _vmlsdavax,  3, (RRe, RMQ, RMQ),             mve_vmladav),
+
+#undef  ARM_VARIANT
+#define ARM_VARIANT    & fpu_vfp_ext_v1xd
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
+
+ mCEF(vmullt, _vmullt, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ),  mve_vmull),
+ mnCEF(vadd,  _vadd,   3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR),       neon_addsub_if_i),
+ mnCEF(vsub,  _vsub,   3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR),       neon_addsub_if_i),
+
+ MNCEF(vabs,  1b10300, 2, (RNSDQMQ, RNSDQMQ),  neon_abs_neg),
+ MNCEF(vneg,  1b10380, 2, (RNSDQMQ, RNSDQMQ),  neon_abs_neg),
+
+#undef ARM_VARIANT
+#define ARM_VARIANT & fpu_neon_ext_v1
+ mnUF(vabd,      _vabd,    3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su),
+ mnUF(vabdl,     _vabdl,         3, (RNQMQ, RNDMQ, RNDMQ),   neon_dyadic_long),
+ mnUF(vaddl,     _vaddl,         3, (RNQMQ, RNDMQ, RNDMQR),  neon_dyadic_long),
+ mnUF(vsubl,     _vsubl,         3, (RNQMQ, RNDMQ, RNDMQR),  neon_dyadic_long),
  };
  #undef ARM_VARIANT
  #undef THUMB_VARIANT
@@ -21284,8 +22883,10 @@ static const struct asm_opcode insns[] =
  #undef cCE
  #undef cCL
  #undef C3E
+#undef C3
  #undef CE
  #undef CM
+#undef CL
  #undef UE
  #undef UF
  #undef UT
@@ -21301,6 +22902,10 @@ static const struct asm_opcode insns[] =
  #undef OPS5
  #undef OPS6
  #undef do_0
+#undef ToC
+#undef toC
+#undef ToU
+#undef toU
  \f
  /* MD interface: bits in the object file.  */
  
@@ -21817,21 +23422,6 @@ valueT
  md_section_align (segT  segment ATTRIBUTE_UNUSED,
                   valueT size)
  {
-#if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
-  if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
-    {
-      /* For a.out, force the section size to be aligned.  If we don't do
-        this, BFD will align it for us, but it will not write out the
-        final bytes of the section.  This may be a bug in BFD, but it is
-        easier to fix it here since that is how the other a.out targets
-        work.  */
-      int align;
-
-      align = bfd_get_section_alignment (stdoutput, segment);
-      size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
-    }
-#endif
-
    return size;
  }
  
@@ -22165,6 +23755,7 @@ add_unwind_adjustsp (offsetT offset)
  }
  
  /* Finish the list of unwind opcodes for this function.         */
+
  static void
  finish_unwind_opcodes (void)
  {
@@ -22451,7 +24042,7 @@ tc_arm_regname_to_dw2regnum (char *regname)
    if (reg != FAIL)
      return reg + 256;
  
-  return -1;
+  return FAIL;
  }
  
  #ifdef TE_PE
@@ -22512,11 +24103,17 @@ md_pcrel_from_section (fixS * fixP, segT seg)
        return (base + 4) & ~3;
  
        /* Thumb branches are simply offset by +4.  */
+    case BFD_RELOC_THUMB_PCREL_BRANCH5:
      case BFD_RELOC_THUMB_PCREL_BRANCH7:
      case BFD_RELOC_THUMB_PCREL_BRANCH9:
      case BFD_RELOC_THUMB_PCREL_BRANCH12:
      case BFD_RELOC_THUMB_PCREL_BRANCH20:
      case BFD_RELOC_THUMB_PCREL_BRANCH25:
+    case BFD_RELOC_THUMB_PCREL_BFCSEL:
+    case BFD_RELOC_ARM_THUMB_BF17:
+    case BFD_RELOC_ARM_THUMB_BF19:
+    case BFD_RELOC_ARM_THUMB_BF13:
+    case BFD_RELOC_ARM_THUMB_LOOP12:
        return base + 4;
  
      case BFD_RELOC_THUMB_PCREL_BRANCH23:
@@ -22632,7 +24229,7 @@ arm_tc_equal_in_insn (int c ATTRIBUTE_UNUSED, char * name)
             already_warned = hash_new ();
           /* Only warn about the symbol once.  To keep the code
              simple we let hash_insert do the lookup for us.  */
-         if (hash_insert (already_warned, name, NULL) == NULL)
+         if (hash_insert (already_warned, nbuf, NULL) == NULL)
             as_warn (_("[-mwarn-syms]: Assignment makes a symbol match an ARM instruction: %s"), name);
         }
        else
@@ -22881,6 +24478,7 @@ thumb32_negate_data_op (offsetT *instruction, unsigned int value)
  }
  
  /* Read a 32-bit thumb instruction from buf.  */
+
  static unsigned long
  get_thumb32_insn (char * buf)
  {
@@ -22891,7 +24489,6 @@ get_thumb32_insn (char * buf)
    return insn;
  }
  
-
  /* We usually want to set the low bit on the address of thumb function
     symbols.  In particular .word foo - . should have the low bit set.
     Generic code tries to fold the difference of two symbols to
@@ -23390,12 +24987,14 @@ md_apply_fix (fixS *  fixP,
               /* MOV accepts both Thumb2 modified immediate (T2 encoding) and
                  UINT16 (T3 encoding), MOVW only accepts UINT16.  When
                  disassembling, MOV is preferred when there is no encoding
-                overlap.
-                NOTE: MOV is using ORR opcode under Thumb 2 mode.  */
+                overlap.  */
               if (((newval >> T2_DATA_OP_SHIFT) & 0xf) == T2_OPCODE_ORR
+                 /* NOTE: MOV uses the ORR opcode in Thumb 2 mode
+                    but with the Rn field [19:16] set to 1111.  */
+                 && (((newval >> 16) & 0xf) == 0xf)
                   && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2_v8m)
                   && !((newval >> T2_SBIT_SHIFT) & 0x1)
-                 && value >= 0 && value <=0xffff)
+                 && value >= 0 && value <= 0xffff)
                 {
                   /* Toggle bit[25] to change encoding from T2 to T3.  */
                   newval ^= 1 << 25;
@@ -23759,6 +25358,21 @@ md_apply_fix (fixS *   fixP,
        S_SET_THREAD_LOCAL (fixP->fx_addsy);
        break;
  
+      /* Same handling as above, but with the arm_fdpic guard.  */
+    case BFD_RELOC_ARM_TLS_GD32_FDPIC:
+    case BFD_RELOC_ARM_TLS_IE32_FDPIC:
+    case BFD_RELOC_ARM_TLS_LDM32_FDPIC:
+      if (arm_fdpic)
+       {
+         S_SET_THREAD_LOCAL (fixP->fx_addsy);
+       }
+      else
+       {
+         as_bad_where (fixP->fx_file, fixP->fx_line,
+                       _("Relocation supported only in FDPIC mode"));
+       }
+      break;
+
      case BFD_RELOC_ARM_GOT32:
      case BFD_RELOC_ARM_GOTOFF:
        break;
@@ -23775,6 +25389,22 @@ md_apply_fix (fixS *   fixP,
        if (fixP->fx_done || !seg->use_rela_p)
         md_number_to_chars (buf, fixP->fx_offset, 4);
        break;
+
+      /* Relocations for FDPIC.  */
+    case BFD_RELOC_ARM_GOTFUNCDESC:
+    case BFD_RELOC_ARM_GOTOFFFUNCDESC:
+    case BFD_RELOC_ARM_FUNCDESC:
+      if (arm_fdpic)
+       {
+         if (fixP->fx_done || !seg->use_rela_p)
+           md_number_to_chars (buf, 0, 4);
+       }
+      else
+       {
+         as_bad_where (fixP->fx_file, fixP->fx_line,
+                       _("Relocation supported only in FDPIC mode"));
+      }
+      break;
  #endif
  
      case BFD_RELOC_RVA:
@@ -23812,6 +25442,7 @@ md_apply_fix (fixS *    fixP,
  
      case BFD_RELOC_ARM_CP_OFF_IMM:
      case BFD_RELOC_ARM_T32_CP_OFF_IMM:
+    case BFD_RELOC_ARM_T32_VLDR_VSTR_OFF_IMM:
        if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM)
         newval = md_chars_to_number (buf, INSN_SIZE);
        else
@@ -23825,6 +25456,12 @@ md_apply_fix (fixS *   fixP,
             as_bad_where (fixP->fx_file, fixP->fx_line,
                           _("co-processor offset out of range"));
         }
+      else if ((newval & 0xfe001f80) == 0xec000f80)
+       {
+         if (value < -511 || value > 512 || (value & 3))
+           as_bad_where (fixP->fx_file, fixP->fx_line,
+                         _("co-processor offset out of range"));
+       }
        else if (value < -1023 || value > 1023 || (value & 3))
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("co-processor offset out of range"));
@@ -23838,10 +25475,18 @@ md_apply_fix (fixS *  fixP,
        else
         newval = get_thumb32_insn (buf);
        if (value == 0)
-       newval &= 0xffffff00;
+       {
+         if (fixP->fx_r_type == BFD_RELOC_ARM_T32_VLDR_VSTR_OFF_IMM)
+           newval &= 0xffffff80;
+         else
+           newval &= 0xffffff00;
+       }
        else
         {
-         newval &= 0xff7fff00;
+         if (fixP->fx_r_type == BFD_RELOC_ARM_T32_VLDR_VSTR_OFF_IMM)
+           newval &= 0xff7fff80;
+         else
+           newval &= 0xff7fff00;
           if ((newval & 0x0f200f00) == 0x0d000900)
             {
               /* This is a fp16 vstr/vldr.
@@ -24203,7 +25848,7 @@ md_apply_fix (fixS *    fixP,
         {
          bfd_vma insn;
          bfd_vma encoded_addend;
-        bfd_vma addend_abs = abs (value);
+        bfd_vma addend_abs = llabs (value);
  
          /* Check that the absolute value of the addend can be
             expressed as an 8-bit constant plus a rotation.  */
@@ -24244,7 +25889,7 @@ md_apply_fix (fixS *    fixP,
        if (!seg->use_rela_p)
         {
           bfd_vma insn;
-         bfd_vma addend_abs = abs (value);
+         bfd_vma addend_abs = llabs (value);
  
           /* Check that the absolute value of the addend can be
              encoded in 12 bits.  */
@@ -24283,7 +25928,7 @@ md_apply_fix (fixS *    fixP,
        if (!seg->use_rela_p)
         {
           bfd_vma insn;
-         bfd_vma addend_abs = abs (value);
+         bfd_vma addend_abs = llabs (value);
  
           /* Check that the absolute value of the addend can be
              encoded in 8 bits.  */
@@ -24323,7 +25968,7 @@ md_apply_fix (fixS *    fixP,
        if (!seg->use_rela_p)
         {
           bfd_vma insn;
-         bfd_vma addend_abs = abs (value);
+         bfd_vma addend_abs = llabs (value);
  
           /* Check that the absolute value of the addend is a multiple of
              four and, when divided by four, fits in 8 bits.  */
@@ -24357,6 +26002,195 @@ md_apply_fix (fixS *  fixP,
         }
        break;
  
+    case BFD_RELOC_THUMB_PCREL_BRANCH5:
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && !S_FORCE_RELOC (fixP->fx_addsy, TRUE)
+         && ARM_IS_FUNC (fixP->fx_addsy)
+         && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v8_1m_main))
+       {
+         /* Force a relocation for a branch 5 bits wide.  */
+         fixP->fx_done = 0;
+       }
+      if (v8_1_branch_value_check (value, 5, FALSE) == FAIL)
+       as_bad_where (fixP->fx_file, fixP->fx_line,
+                     BAD_BRANCH_OFF);
+
+      if (fixP->fx_done || !seg->use_rela_p)
+       {
+         addressT boff = value >> 1;
+
+         newval  = md_chars_to_number (buf, THUMB_SIZE);
+         newval |= (boff << 7);
+         md_number_to_chars (buf, newval, THUMB_SIZE);
+       }
+      break;
+
+    case BFD_RELOC_THUMB_PCREL_BFCSEL:
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && !S_FORCE_RELOC (fixP->fx_addsy, TRUE)
+         && ARM_IS_FUNC (fixP->fx_addsy)
+         && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v8_1m_main))
+       {
+         fixP->fx_done = 0;
+       }
+      if ((value & ~0x7f) && ((value & ~0x3f) != ~0x3f))
+       as_bad_where (fixP->fx_file, fixP->fx_line,
+                     _("branch out of range"));
+
+      if (fixP->fx_done || !seg->use_rela_p)
+       {
+         newval  = md_chars_to_number (buf, THUMB_SIZE);
+
+         addressT boff = ((newval & 0x0780) >> 7) << 1;
+         addressT diff = value - boff;
+
+         if (diff == 4)
+           {
+             newval |= 1 << 1; /* T bit.  */
+           }
+         else if (diff != 2)
+           {
+             as_bad_where (fixP->fx_file, fixP->fx_line,
+                           _("out of range label-relative fixup value"));
+           }
+         md_number_to_chars (buf, newval, THUMB_SIZE);
+       }
+      break;
+
+    case BFD_RELOC_ARM_THUMB_BF17:
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && !S_FORCE_RELOC (fixP->fx_addsy, TRUE)
+         && ARM_IS_FUNC (fixP->fx_addsy)
+         && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v8_1m_main))
+       {
+         /* Force a relocation for a branch 17 bits wide.  */
+         fixP->fx_done = 0;
+       }
+
+      if (v8_1_branch_value_check (value, 17, TRUE) == FAIL)
+       as_bad_where (fixP->fx_file, fixP->fx_line,
+                     BAD_BRANCH_OFF);
+
+      if (fixP->fx_done || !seg->use_rela_p)
+       {
+         offsetT newval2;
+         addressT immA, immB, immC;
+
+         immA = (value & 0x0001f000) >> 12;
+         immB = (value & 0x00000ffc) >> 2;
+         immC = (value & 0x00000002) >> 1;
+
+         newval   = md_chars_to_number (buf, THUMB_SIZE);
+         newval2  = md_chars_to_number (buf + THUMB_SIZE, THUMB_SIZE);
+         newval  |= immA;
+         newval2 |= (immC << 11) | (immB << 1);
+         md_number_to_chars (buf, newval, THUMB_SIZE);
+         md_number_to_chars (buf + THUMB_SIZE, newval2, THUMB_SIZE);
+       }
+      break;
+
+    case BFD_RELOC_ARM_THUMB_BF19:
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && !S_FORCE_RELOC (fixP->fx_addsy, TRUE)
+         && ARM_IS_FUNC (fixP->fx_addsy)
+         && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v8_1m_main))
+       {
+         /* Force a relocation for a branch 19 bits wide.  */
+         fixP->fx_done = 0;
+       }
+
+      if (v8_1_branch_value_check (value, 19, TRUE) == FAIL)
+       as_bad_where (fixP->fx_file, fixP->fx_line,
+                     BAD_BRANCH_OFF);
+
+      if (fixP->fx_done || !seg->use_rela_p)
+       {
+         offsetT newval2;
+         addressT immA, immB, immC;
+
+         immA = (value & 0x0007f000) >> 12;
+         immB = (value & 0x00000ffc) >> 2;
+         immC = (value & 0x00000002) >> 1;
+
+         newval   = md_chars_to_number (buf, THUMB_SIZE);
+         newval2  = md_chars_to_number (buf + THUMB_SIZE, THUMB_SIZE);
+         newval  |= immA;
+         newval2 |= (immC << 11) | (immB << 1);
+         md_number_to_chars (buf, newval, THUMB_SIZE);
+         md_number_to_chars (buf + THUMB_SIZE, newval2, THUMB_SIZE);
+       }
+      break;
+
+    case BFD_RELOC_ARM_THUMB_BF13:
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && !S_FORCE_RELOC (fixP->fx_addsy, TRUE)
+         && ARM_IS_FUNC (fixP->fx_addsy)
+         && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v8_1m_main))
+       {
+         /* Force a relocation for a branch 13 bits wide.  */
+         fixP->fx_done = 0;
+       }
+
+      if (v8_1_branch_value_check (value, 13, TRUE) == FAIL)
+       as_bad_where (fixP->fx_file, fixP->fx_line,
+                     BAD_BRANCH_OFF);
+
+      if (fixP->fx_done || !seg->use_rela_p)
+       {
+         offsetT newval2;
+         addressT immA, immB, immC;
+
+         immA = (value & 0x00001000) >> 12;
+         immB = (value & 0x00000ffc) >> 2;
+         immC = (value & 0x00000002) >> 1;
+
+         newval   = md_chars_to_number (buf, THUMB_SIZE);
+         newval2  = md_chars_to_number (buf + THUMB_SIZE, THUMB_SIZE);
+         newval  |= immA;
+         newval2 |= (immC << 11) | (immB << 1);
+         md_number_to_chars (buf, newval, THUMB_SIZE);
+         md_number_to_chars (buf + THUMB_SIZE, newval2, THUMB_SIZE);
+       }
+      break;
+
+    case BFD_RELOC_ARM_THUMB_LOOP12:
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && !S_FORCE_RELOC (fixP->fx_addsy, TRUE)
+         && ARM_IS_FUNC (fixP->fx_addsy)
+         && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v8_1m_main))
+       {
+         /* Force a relocation for a branch 12 bits wide.  */
+         fixP->fx_done = 0;
+       }
+
+      bfd_vma insn = get_thumb32_insn (buf);
+      /* le lr, <label> or le <label> */
+      if (((insn & 0xffffffff) == 0xf00fc001)
+         || ((insn & 0xffffffff) == 0xf02fc001))
+       value = -value;
+
+      if (v8_1_branch_value_check (value, 12, FALSE) == FAIL)
+       as_bad_where (fixP->fx_file, fixP->fx_line,
+                     BAD_BRANCH_OFF);
+      if (fixP->fx_done || !seg->use_rela_p)
+       {
+         addressT imml, immh;
+
+         immh = (value & 0x00000ffc) >> 2;
+         imml = (value & 0x00000002) >> 1;
+
+         newval  = md_chars_to_number (buf + THUMB_SIZE, THUMB_SIZE);
+         newval |= (imml << 11) | (immh << 1);
+         md_number_to_chars (buf + THUMB_SIZE, newval, THUMB_SIZE);
+       }
+      break;
+
      case BFD_RELOC_ARM_V4BX:
        /* This will need to go in the object file.  */
        fixP->fx_done = 0;
@@ -24536,14 +26370,23 @@ tc_gen_reloc (asection *section, fixS *fixp)
      case BFD_RELOC_ARM_THUMB_ALU_ABS_G1_NC:
      case BFD_RELOC_ARM_THUMB_ALU_ABS_G2_NC:
      case BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC:
+    case BFD_RELOC_ARM_GOTFUNCDESC:
+    case BFD_RELOC_ARM_GOTOFFFUNCDESC:
+    case BFD_RELOC_ARM_FUNCDESC:
+    case BFD_RELOC_ARM_THUMB_BF17:
+    case BFD_RELOC_ARM_THUMB_BF19:
+    case BFD_RELOC_ARM_THUMB_BF13:
        code = fixp->fx_r_type;
        break;
  
      case BFD_RELOC_ARM_TLS_GOTDESC:
      case BFD_RELOC_ARM_TLS_GD32:
+    case BFD_RELOC_ARM_TLS_GD32_FDPIC:
      case BFD_RELOC_ARM_TLS_LE32:
      case BFD_RELOC_ARM_TLS_IE32:
+    case BFD_RELOC_ARM_TLS_IE32_FDPIC:
      case BFD_RELOC_ARM_TLS_LDM32:
+    case BFD_RELOC_ARM_TLS_LDM32_FDPIC:
        /* BFD will include the symbol's address in the addend.
          But we don't want that, so subtract it out again here.  */
        if (!S_IS_COMMON (fixp->fx_addsy))
@@ -24562,6 +26405,14 @@ tc_gen_reloc (asection *section, fixS *fixp)
                     _("ADRL used for a symbol not defined in the same file"));
        return NULL;
  
+    case BFD_RELOC_THUMB_PCREL_BRANCH5:
+    case BFD_RELOC_THUMB_PCREL_BFCSEL:
+    case BFD_RELOC_ARM_THUMB_LOOP12:
+      as_bad_where (fixp->fx_file, fixp->fx_line,
+                   _("%s used for a symbol not defined in the same file"),
+                   bfd_get_reloc_code_name (fixp->fx_r_type));
+      return NULL;
+
      case BFD_RELOC_ARM_OFFSET_IMM:
        if (section->use_rela_p)
         {
@@ -24809,9 +26660,12 @@ arm_fix_adjustable (fixS * fixP)
        || fixP->fx_r_type == BFD_RELOC_ARM_GOT32
        || fixP->fx_r_type == BFD_RELOC_ARM_GOTOFF
        || fixP->fx_r_type == BFD_RELOC_ARM_TLS_GD32
+      || fixP->fx_r_type == BFD_RELOC_ARM_TLS_GD32_FDPIC
        || fixP->fx_r_type == BFD_RELOC_ARM_TLS_LE32
        || fixP->fx_r_type == BFD_RELOC_ARM_TLS_IE32
+      || fixP->fx_r_type == BFD_RELOC_ARM_TLS_IE32_FDPIC
        || fixP->fx_r_type == BFD_RELOC_ARM_TLS_LDM32
+      || fixP->fx_r_type == BFD_RELOC_ARM_TLS_LDM32_FDPIC
        || fixP->fx_r_type == BFD_RELOC_ARM_TLS_LDO32
        || fixP->fx_r_type == BFD_RELOC_ARM_TLS_GOTDESC
        || fixP->fx_r_type == BFD_RELOC_ARM_TLS_CALL
@@ -24865,10 +26719,20 @@ elf32_arm_target_format (void)
           ? "elf32-bigarm-nacl"
           : "elf32-littlearm-nacl");
  #else
-  if (target_big_endian)
-    return "elf32-bigarm";
+  if (arm_fdpic)
+    {
+      if (target_big_endian)
+       return "elf32-bigarm-fdpic";
+      else
+       return "elf32-littlearm-fdpic";
+    }
    else
-    return "elf32-littlearm";
+    {
+      if (target_big_endian)
+       return "elf32-bigarm";
+      else
+       return "elf32-littlearm";
+    }
  #endif
  }
  
@@ -24887,8 +26751,8 @@ arm_cleanup (void)
  {
    literal_pool * pool;
  
-  /* Ensure that all the IT blocks are properly closed.  */
-  check_it_blocks_finished ();
+  /* Ensure that all the predication blocks are properly closed.  */
+  check_pred_blocks_finished ();
  
    for (pool = list_of_pools; pool; pool = pool->next)
      {
@@ -25080,6 +26944,7 @@ md_begin (void)
  
    if (  (arm_ops_hsh = hash_new ()) == NULL
        || (arm_cond_hsh = hash_new ()) == NULL
+      || (arm_vcond_hsh = hash_new ()) == NULL
        || (arm_shift_hsh = hash_new ()) == NULL
        || (arm_psr_hsh = hash_new ()) == NULL
        || (arm_v7m_psr_hsh = hash_new ()) == NULL
@@ -25092,6 +26957,8 @@ md_begin (void)
      hash_insert (arm_ops_hsh, insns[i].template_name, (void *) (insns + i));
    for (i = 0; i < sizeof (conds) / sizeof (struct asm_cond); i++)
      hash_insert (arm_cond_hsh, conds[i].template_name, (void *) (conds + i));
+  for (i = 0; i < sizeof (vconds) / sizeof (struct asm_cond); i++)
+    hash_insert (arm_vcond_hsh, vconds[i].template_name, (void *) (vconds + i));
    for (i = 0; i < sizeof (shift_names) / sizeof (struct asm_shift_name); i++)
      hash_insert (arm_shift_hsh, shift_names[i].name, (void *) (shift_names + i));
    for (i = 0; i < sizeof (psrs) / sizeof (struct asm_psr); i++)
@@ -25130,71 +26997,70 @@ md_begin (void)
        if (mcpu_cpu_opt || march_cpu_opt)
         as_bad (_("use of old and new-style options to set CPU type"));
  
-      mcpu_cpu_opt = legacy_cpu;
+      selected_arch = *legacy_cpu;
      }
-  else if (!mcpu_cpu_opt)
+  else if (mcpu_cpu_opt)
      {
-      mcpu_cpu_opt = march_cpu_opt;
-      dyn_mcpu_ext_opt = dyn_march_ext_opt;
-      /* Avoid double free in arm_md_end.  */
-      dyn_march_ext_opt = NULL;
+      selected_arch = *mcpu_cpu_opt;
+      selected_ext = *mcpu_ext_opt;
      }
+  else if (march_cpu_opt)
+    {
+      selected_arch = *march_cpu_opt;
+      selected_ext = *march_ext_opt;
+    }
+  ARM_MERGE_FEATURE_SETS (selected_cpu, selected_arch, selected_ext);
  
    if (legacy_fpu)
      {
        if (mfpu_opt)
         as_bad (_("use of old and new-style options to set FPU type"));
  
-      mfpu_opt = legacy_fpu;
+      selected_fpu = *legacy_fpu;
      }
-  else if (!mfpu_opt)
+  else if (mfpu_opt)
+    selected_fpu = *mfpu_opt;
+  else
      {
  #if !(defined (EABI_DEFAULT) || defined (TE_LINUX) \
         || defined (TE_NetBSD) || defined (TE_VXWORKS))
        /* Some environments specify a default FPU.  If they don't, infer it
          from the processor.  */
        if (mcpu_fpu_opt)
-       mfpu_opt = mcpu_fpu_opt;
-      else
-       mfpu_opt = march_fpu_opt;
+       selected_fpu = *mcpu_fpu_opt;
+      else if (march_fpu_opt)
+       selected_fpu = *march_fpu_opt;
  #else
-      mfpu_opt = &fpu_default;
+      selected_fpu = fpu_default;
  #endif
      }
  
-  if (!mfpu_opt)
+  if (ARM_FEATURE_ZERO (selected_fpu))
      {
-      if (mcpu_cpu_opt != NULL)
-       mfpu_opt = &fpu_default;
-      else if (mcpu_fpu_opt != NULL && ARM_CPU_HAS_FEATURE (*mcpu_fpu_opt, arm_ext_v5))
-       mfpu_opt = &fpu_arch_vfp_v2;
+      if (!no_cpu_selected ())
+       selected_fpu = fpu_default;
        else
-       mfpu_opt = &fpu_arch_fpa;
+       selected_fpu = fpu_arch_fpa;
      }
  
  #ifdef CPU_DEFAULT
-  if (!mcpu_cpu_opt)
+  if (ARM_FEATURE_ZERO (selected_arch))
      {
-      mcpu_cpu_opt = &cpu_default;
-      selected_cpu = cpu_default;
+      selected_arch = cpu_default;
+      selected_cpu = selected_arch;
      }
-  else if (dyn_mcpu_ext_opt)
-    ARM_MERGE_FEATURE_SETS (selected_cpu, *mcpu_cpu_opt, *dyn_mcpu_ext_opt);
-  else
-    selected_cpu = *mcpu_cpu_opt;
+  ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, selected_fpu);
  #else
-  if (mcpu_cpu_opt && dyn_mcpu_ext_opt)
-    ARM_MERGE_FEATURE_SETS (selected_cpu, *mcpu_cpu_opt, *dyn_mcpu_ext_opt);
-  else if (mcpu_cpu_opt)
-    selected_cpu = *mcpu_cpu_opt;
+  /*  Autodection of feature mode: allow all features in cpu_variant but leave
+      selected_cpu unset.  It will be set in aeabi_set_public_attributes ()
+      after all instruction have been processed and we can decide what CPU
+      should be selected.  */
+  if (ARM_FEATURE_ZERO (selected_arch))
+    ARM_MERGE_FEATURE_SETS (cpu_variant, arm_arch_any, selected_fpu);
    else
-    mcpu_cpu_opt = &arm_arch_any;
+    ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, selected_fpu);
  #endif
  
-  ARM_MERGE_FEATURE_SETS (cpu_variant, *mcpu_cpu_opt, *mfpu_opt);
-  if (dyn_mcpu_ext_opt)
-    ARM_MERGE_FEATURE_SETS (cpu_variant, cpu_variant, *dyn_mcpu_ext_opt);
-
    autoselect_thumb_from_cpu_variant ();
  
    arm_arch_used = thumb_arch_used = arm_arch_none;
@@ -25389,6 +27255,7 @@ const char * md_shortopts = "m:k";
  #endif
  #endif
  #define OPTION_FIX_V4BX (OPTION_MD_BASE + 2)
+#define OPTION_FDPIC (OPTION_MD_BASE + 3)
  
  struct option md_longopts[] =
  {
@@ -25399,19 +27266,21 @@ struct option md_longopts[] =
    {"EL", no_argument, NULL, OPTION_EL},
  #endif
    {"fix-v4bx", no_argument, NULL, OPTION_FIX_V4BX},
+#ifdef OBJ_ELF
+  {"fdpic", no_argument, NULL, OPTION_FDPIC},
+#endif
    {NULL, no_argument, NULL, 0}
  };
  
-
  size_t md_longopts_size = sizeof (md_longopts);
  
  struct arm_option_table
  {
-  const char *option;          /* Option name to match.  */
-  const char *help;            /* Help information.  */
-  int  *var;           /* Variable to change.  */
-  int  value;          /* What to change it to.  */
-  const char *deprecated;      /* If non-null, print this message.  */
+  const char *  option;                /* Option name to match.  */
+  const char *  help;          /* Help information.  */
+  int *         var;           /* Variable to change.  */
+  int          value;          /* What to change it to.  */
+  const char *  deprecated;    /* If non-null, print this message.  */
  };
  
  struct arm_option_table arm_opts[] =
@@ -25444,10 +27313,10 @@ struct arm_option_table arm_opts[] =
  
  struct arm_legacy_option_table
  {
-  const char *option;                          /* Option name to match.  */
-  const arm_feature_set        **var;          /* Variable to change.  */
-  const arm_feature_set        value;          /* What to change it to.  */
-  const char *deprecated;                      /* If non-null, print this message.  */
+  const char *              option;            /* Option name to match.  */
+  const arm_feature_set        **  var;                /* Variable to change.  */
+  const arm_feature_set            value;              /* What to change it to.  */
+  const char *              deprecated;                /* If non-null, print this message.  */
  };
  
  const struct arm_legacy_option_table arm_legacy_opts[] =
@@ -25554,10 +27423,10 @@ const struct arm_legacy_option_table arm_legacy_opts[] =
    {"marmv5e",   &legacy_cpu, ARM_ARCH_V5TE, N_("use -march=armv5te")},
  
    /* Floating point variants -- don't add any more to this list either.         */
-  {"mfpe-old", &legacy_fpu, FPU_ARCH_FPE, N_("use -mfpu=fpe")},
-  {"mfpa10",   &legacy_fpu, FPU_ARCH_FPA, N_("use -mfpu=fpa10")},
-  {"mfpa11",   &legacy_fpu, FPU_ARCH_FPA, N_("use -mfpu=fpa11")},
-  {"mno-fpu",  &legacy_fpu, ARM_ARCH_NONE,
+  {"mfpe-old",   &legacy_fpu, FPU_ARCH_FPE, N_("use -mfpu=fpe")},
+  {"mfpa10",     &legacy_fpu, FPU_ARCH_FPA, N_("use -mfpu=fpa10")},
+  {"mfpa11",     &legacy_fpu, FPU_ARCH_FPA, N_("use -mfpu=fpa11")},
+  {"mno-fpu",    &legacy_fpu, ARM_ARCH_NONE,
     N_("use either -mfpu=softfpa or -mfpu=softvfp")},
  
    {NULL, NULL, ARM_ARCH_NONE, NULL}
@@ -25565,21 +27434,22 @@ const struct arm_legacy_option_table arm_legacy_opts[] =
  
  struct arm_cpu_option_table
  {
-  const char *name;
-  size_t name_len;
-  const arm_feature_set        value;
-  const arm_feature_set        ext;
+  const char *           name;
+  size_t                 name_len;
+  const arm_feature_set         value;
+  const arm_feature_set         ext;
    /* For some CPUs we assume an FPU unless the user explicitly sets
       -mfpu=... */
-  const arm_feature_set        default_fpu;
+  const arm_feature_set         default_fpu;
    /* The canonical name of the CPU, or NULL to use NAME converted to upper
       case.  */
-  const char *canonical_name;
+  const char *           canonical_name;
  };
  
  /* This list should, at a minimum, contain all the cpu names
     recognized by GCC.  */
  #define ARM_CPU_OPT(N, CN, V, E, DF) { N, sizeof (N) - 1, V, E, DF, CN }
+
  static const struct arm_cpu_option_table arm_cpus[] =
  {
    ARM_CPU_OPT ("all",            NULL,                ARM_ANY,
@@ -25863,7 +27733,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a55",    "Cortex-A55",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
-              FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
+              FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
    ARM_CPU_OPT ("cortex-a57",     "Cortex-A57",        ARM_ARCH_V8A,
                ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
@@ -25875,7 +27745,13 @@ static const struct arm_cpu_option_table arm_cpus[] =
               FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a75",    "Cortex-A75",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
-              FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
+              FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
+  ARM_CPU_OPT ("cortex-a76",    "Cortex-A76",         ARM_ARCH_V8_2A,
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+              FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
+  ARM_CPU_OPT ("ares",    "Ares",             ARM_ARCH_V8_2A,
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+              FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
    ARM_CPU_OPT ("cortex-r4",      "Cortex-R4",         ARM_ARCH_V7R,
                ARM_ARCH_NONE,
                FPU_NONE),
@@ -25921,7 +27797,9 @@ static const struct arm_cpu_option_table arm_cpus[] =
    ARM_CPU_OPT ("exynos-m1",      "Samsung Exynos M1", ARM_ARCH_V8A,
                ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
-
+  ARM_CPU_OPT ("neoverse-n1",    "Neoverse N1",               ARM_ARCH_V8_2A,
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+              FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
    /* ??? XSCALE is really an architecture.  */
    ARM_CPU_OPT ("xscale",         NULL,                ARM_ARCH_XSCALE,
                ARM_ARCH_NONE,
@@ -25938,7 +27816,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
                ARM_ARCH_NONE,
                FPU_ARCH_VFP_V2),
  
-  /* Maverick */
+  /* Maverick.  */
    ARM_CPU_OPT ("ep9312",         "ARM920T",
                ARM_FEATURE_LOW (ARM_AEXT_V4T, ARM_CEXT_MAVERICK),
                ARM_ARCH_NONE, FPU_ARCH_MAVERICK),
@@ -25963,95 +27841,325 @@ static const struct arm_cpu_option_table arm_cpus[] =
  };
  #undef ARM_CPU_OPT
  
+struct arm_ext_table
+{
+  const char *           name;
+  size_t                 name_len;
+  const arm_feature_set          merge;
+  const arm_feature_set          clear;
+};
+
  struct arm_arch_option_table
  {
-  const char *name;
-  size_t name_len;
-  const arm_feature_set        value;
-  const arm_feature_set        default_fpu;
+  const char *                 name;
+  size_t                       name_len;
+  const arm_feature_set                value;
+  const arm_feature_set                default_fpu;
+  const struct arm_ext_table * ext_table;
+};
+
+/* Used to add support for +E and +noE extension.  */
+#define ARM_EXT(E, M, C) { E, sizeof (E) - 1, M, C }
+/* Used to add support for a +E extension.  */
+#define ARM_ADD(E, M) { E, sizeof(E) - 1, M, ARM_ARCH_NONE }
+/* Used to add support for a +noE extension.  */
+#define ARM_REMOVE(E, C) { E, sizeof(E) -1, ARM_ARCH_NONE, C }
+
+#define ALL_FP ARM_FEATURE (0, ARM_EXT2_FP16_INST | ARM_EXT2_FP16_FML, \
+                           ~0 & ~FPU_ENDIAN_PURE)
+
+static const struct arm_ext_table armv5te_ext_table[] =
+{
+  ARM_EXT ("fp", FPU_ARCH_VFP_V2, ALL_FP),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv7_ext_table[] =
+{
+  ARM_EXT ("fp", FPU_ARCH_VFP_V3D16, ALL_FP),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv7ve_ext_table[] =
+{
+  ARM_EXT ("fp", FPU_ARCH_VFP_V4D16, ALL_FP),
+  ARM_ADD ("vfpv3-d16", FPU_ARCH_VFP_V3D16),
+  ARM_ADD ("vfpv3", FPU_ARCH_VFP_V3),
+  ARM_ADD ("vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16),
+  ARM_ADD ("vfpv3-fp16", FPU_ARCH_VFP_V3_FP16),
+  ARM_ADD ("vfpv4-d16", FPU_ARCH_VFP_V4D16),  /* Alias for +fp.  */
+  ARM_ADD ("vfpv4", FPU_ARCH_VFP_V4),
+
+  ARM_EXT ("simd", FPU_ARCH_NEON_VFP_V4,
+          ARM_FEATURE_COPROC (FPU_NEON_EXT_V1 | FPU_NEON_EXT_FMA)),
+
+  /* Aliases for +simd.  */
+  ARM_ADD ("neon-vfpv4", FPU_ARCH_NEON_VFP_V4),
+
+  ARM_ADD ("neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1),
+  ARM_ADD ("neon-vfpv3", FPU_ARCH_VFP_V3_PLUS_NEON_V1),
+  ARM_ADD ("neon-fp16", FPU_ARCH_NEON_FP16),
+
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv7a_ext_table[] =
+{
+  ARM_EXT ("fp", FPU_ARCH_VFP_V3D16, ALL_FP),
+  ARM_ADD ("vfpv3-d16", FPU_ARCH_VFP_V3D16), /* Alias for +fp.  */
+  ARM_ADD ("vfpv3", FPU_ARCH_VFP_V3),
+  ARM_ADD ("vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16),
+  ARM_ADD ("vfpv3-fp16", FPU_ARCH_VFP_V3_FP16),
+  ARM_ADD ("vfpv4-d16", FPU_ARCH_VFP_V4D16),
+  ARM_ADD ("vfpv4", FPU_ARCH_VFP_V4),
+
+  ARM_EXT ("simd", FPU_ARCH_VFP_V3_PLUS_NEON_V1,
+          ARM_FEATURE_COPROC (FPU_NEON_EXT_V1 | FPU_NEON_EXT_FMA)),
+
+  /* Aliases for +simd.  */
+  ARM_ADD ("neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1),
+  ARM_ADD ("neon-vfpv3", FPU_ARCH_VFP_V3_PLUS_NEON_V1),
+
+  ARM_ADD ("neon-fp16", FPU_ARCH_NEON_FP16),
+  ARM_ADD ("neon-vfpv4", FPU_ARCH_NEON_VFP_V4),
+
+  ARM_ADD ("mp", ARM_FEATURE_CORE_LOW (ARM_EXT_MP)),
+  ARM_ADD ("sec", ARM_FEATURE_CORE_LOW (ARM_EXT_SEC)),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv7r_ext_table[] =
+{
+  ARM_ADD ("fp.sp", FPU_ARCH_VFP_V3xD),
+  ARM_ADD ("vfpv3xd", FPU_ARCH_VFP_V3xD), /* Alias for +fp.sp.  */
+  ARM_EXT ("fp", FPU_ARCH_VFP_V3D16, ALL_FP),
+  ARM_ADD ("vfpv3-d16", FPU_ARCH_VFP_V3D16), /* Alias for +fp.  */
+  ARM_ADD ("vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16),
+  ARM_ADD ("vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16),
+  ARM_EXT ("idiv", ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
+          ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV)),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv7em_ext_table[] =
+{
+  ARM_EXT ("fp", FPU_ARCH_VFP_V4_SP_D16, ALL_FP),
+  /* Alias for +fp, used to be known as fpv4-sp-d16.  */
+  ARM_ADD ("vfpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16),
+  ARM_ADD ("fpv5", FPU_ARCH_VFP_V5_SP_D16),
+  ARM_ADD ("fp.dp", FPU_ARCH_VFP_V5D16),
+  ARM_ADD ("fpv5-d16", FPU_ARCH_VFP_V5D16),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv8a_ext_table[] =
+{
+  ARM_ADD ("crc", ARCH_CRC_ARMV8),
+  ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
+  ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+          ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
+
+  /* Armv8-a does not allow an FP implementation without SIMD, so the user
+     should use the +simd option to turn on FP.  */
+  ARM_REMOVE ("fp", ALL_FP),
+  ARM_ADD ("sb", ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB)),
+  ARM_ADD ("predres", ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES)),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+
+static const struct arm_ext_table armv81a_ext_table[] =
+{
+  ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1),
+  ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1,
+          ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
+
+  /* Armv8-a does not allow an FP implementation without SIMD, so the user
+     should use the +simd option to turn on FP.  */
+  ARM_REMOVE ("fp", ALL_FP),
+  ARM_ADD ("sb", ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB)),
+  ARM_ADD ("predres", ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES)),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv82a_ext_table[] =
+{
+  ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1),
+  ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_2_FP16),
+  ARM_ADD ("fp16fml", FPU_ARCH_NEON_VFP_ARMV8_2_FP16FML),
+  ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1,
+          ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
+  ARM_ADD ("dotprod", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
+
+  /* Armv8-a does not allow an FP implementation without SIMD, so the user
+     should use the +simd option to turn on FP.  */
+  ARM_REMOVE ("fp", ALL_FP),
+  ARM_ADD ("sb", ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB)),
+  ARM_ADD ("predres", ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES)),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv84a_ext_table[] =
+{
+  ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
+  ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+  ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
+          ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
+
+  /* Armv8-a does not allow an FP implementation without SIMD, so the user
+     should use the +simd option to turn on FP.  */
+  ARM_REMOVE ("fp", ALL_FP),
+  ARM_ADD ("sb", ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB)),
+  ARM_ADD ("predres", ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES)),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv85a_ext_table[] =
+{
+  ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
+  ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+  ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
+          ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
+
+  /* Armv8-a does not allow an FP implementation without SIMD, so the user
+     should use the +simd option to turn on FP.  */
+  ARM_REMOVE ("fp", ALL_FP),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv8m_main_ext_table[] =
+{
+  ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
+                 ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP)),
+  ARM_EXT ("fp", FPU_ARCH_VFP_V5_SP_D16, ALL_FP),
+  ARM_ADD ("fp.dp", FPU_ARCH_VFP_V5D16),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv8_1m_main_ext_table[] =
+{
+  ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
+                 ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP)),
+  ARM_EXT ("fp",
+          ARM_FEATURE (0, ARM_EXT2_FP16_INST,
+                       FPU_VFP_V5_SP_D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA),
+          ALL_FP),
+  ARM_ADD ("fp.dp",
+          ARM_FEATURE (0, ARM_EXT2_FP16_INST,
+                       FPU_VFP_V5D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
+  ARM_EXT ("mve", ARM_FEATURE_COPROC (FPU_MVE),
+          ARM_FEATURE_COPROC (FPU_MVE | FPU_MVE_FP)),
+  ARM_ADD ("mve.fp",
+          ARM_FEATURE (0, ARM_EXT2_FP16_INST,
+                       FPU_MVE | FPU_MVE_FP | FPU_VFP_V5_SP_D16 |
+                       FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+static const struct arm_ext_table armv8r_ext_table[] =
+{
+  ARM_ADD ("crc", ARCH_CRC_ARMV8),
+  ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
+  ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+          ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
+  ARM_REMOVE ("fp", ALL_FP),
+  ARM_ADD ("fp.sp", FPU_ARCH_VFP_V5_SP_D16),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  
  /* This list should, at a minimum, contain all the architecture names
     recognized by GCC.  */
-#define ARM_ARCH_OPT(N, V, DF) { N, sizeof (N) - 1, V, DF }
+#define ARM_ARCH_OPT(N, V, DF) { N, sizeof (N) - 1, V, DF, NULL }
+#define ARM_ARCH_OPT2(N, V, DF, ext) \
+  { N, sizeof (N) - 1, V, DF, ext##_ext_table }
+
  static const struct arm_arch_option_table arm_archs[] =
  {
-  ARM_ARCH_OPT ("all",         ARM_ANY,         FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv1",       ARM_ARCH_V1,     FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv2",       ARM_ARCH_V2,     FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv2a",      ARM_ARCH_V2S,    FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv2s",      ARM_ARCH_V2S,    FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv3",       ARM_ARCH_V3,     FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv3m",      ARM_ARCH_V3M,    FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv4",       ARM_ARCH_V4,     FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv4xm",     ARM_ARCH_V4xM,   FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv4t",      ARM_ARCH_V4T,    FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv4txm",    ARM_ARCH_V4TxM,  FPU_ARCH_FPA),
-  ARM_ARCH_OPT ("armv5",       ARM_ARCH_V5,     FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv5t",      ARM_ARCH_V5T,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv5txm",    ARM_ARCH_V5TxM,  FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv5te",     ARM_ARCH_V5TE,   FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv5texp",   ARM_ARCH_V5TExP, FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv5tej",    ARM_ARCH_V5TEJ,  FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6",       ARM_ARCH_V6,     FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6j",      ARM_ARCH_V6,     FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6k",      ARM_ARCH_V6K,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6z",      ARM_ARCH_V6Z,    FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("all",           ARM_ANY,              FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv1",         ARM_ARCH_V1,          FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv2",         ARM_ARCH_V2,          FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv2a",        ARM_ARCH_V2S,         FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv2s",        ARM_ARCH_V2S,         FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv3",         ARM_ARCH_V3,          FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv3m",        ARM_ARCH_V3M,         FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv4",         ARM_ARCH_V4,          FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv4xm",       ARM_ARCH_V4xM,        FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv4t",        ARM_ARCH_V4T,         FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv4txm",      ARM_ARCH_V4TxM,       FPU_ARCH_FPA),
+  ARM_ARCH_OPT ("armv5",         ARM_ARCH_V5,          FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv5t",        ARM_ARCH_V5T,         FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv5txm",      ARM_ARCH_V5TxM,       FPU_ARCH_VFP),
+  ARM_ARCH_OPT2 ("armv5te",      ARM_ARCH_V5TE,        FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv5texp",    ARM_ARCH_V5TExP,      FPU_ARCH_VFP, armv5te),
+  ARM_ARCH_OPT2 ("armv5tej",     ARM_ARCH_V5TEJ,       FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv6",        ARM_ARCH_V6,          FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv6j",       ARM_ARCH_V6,          FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv6k",       ARM_ARCH_V6K,         FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv6z",       ARM_ARCH_V6Z,         FPU_ARCH_VFP,   armv5te),
    /* The official spelling of this variant is ARMv6KZ, the name "armv6zk" is
       kept to preserve existing behaviour.  */
-  ARM_ARCH_OPT ("armv6kz",     ARM_ARCH_V6KZ,   FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6zk",     ARM_ARCH_V6KZ,   FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6t2",     ARM_ARCH_V6T2,   FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6kt2",    ARM_ARCH_V6KT2,  FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6zt2",    ARM_ARCH_V6ZT2,  FPU_ARCH_VFP),
+  ARM_ARCH_OPT2 ("armv6kz",      ARM_ARCH_V6KZ,        FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv6zk",      ARM_ARCH_V6KZ,        FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv6t2",      ARM_ARCH_V6T2,        FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv6kt2",     ARM_ARCH_V6KT2,       FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv6zt2",     ARM_ARCH_V6ZT2,       FPU_ARCH_VFP,   armv5te),
    /* The official spelling of this variant is ARMv6KZ, the name "armv6zkt2" is
       kept to preserve existing behaviour.  */
-  ARM_ARCH_OPT ("armv6kzt2",   ARM_ARCH_V6KZT2, FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6zkt2",   ARM_ARCH_V6KZT2, FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6-m",     ARM_ARCH_V6M,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6s-m",    ARM_ARCH_V6SM,   FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv7",       ARM_ARCH_V7,     FPU_ARCH_VFP),
+  ARM_ARCH_OPT2 ("armv6kzt2",    ARM_ARCH_V6KZT2,      FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT2 ("armv6zkt2",    ARM_ARCH_V6KZT2,      FPU_ARCH_VFP,   armv5te),
+  ARM_ARCH_OPT ("armv6-m",       ARM_ARCH_V6M,         FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv6s-m",      ARM_ARCH_V6SM,        FPU_ARCH_VFP),
+  ARM_ARCH_OPT2 ("armv7",        ARM_ARCH_V7,          FPU_ARCH_VFP, armv7),
    /* The official spelling of the ARMv7 profile variants is the dashed form.
       Accept the non-dashed form for compatibility with old toolchains.  */
-  ARM_ARCH_OPT ("armv7a",      ARM_ARCH_V7A,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv7ve",     ARM_ARCH_V7VE,   FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv7r",      ARM_ARCH_V7R,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv7m",      ARM_ARCH_V7M,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv7-a",     ARM_ARCH_V7A,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv7-r",     ARM_ARCH_V7R,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv7-m",     ARM_ARCH_V7M,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv7e-m",    ARM_ARCH_V7EM,   FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv8-m.base",        ARM_ARCH_V8M_BASE, FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv8-m.main",        ARM_ARCH_V8M_MAIN, FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv8-a",     ARM_ARCH_V8A,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv8.1-a",   ARM_ARCH_V8_1A,  FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv8.2-a",   ARM_ARCH_V8_2A,  FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv8.3-a",   ARM_ARCH_V8_3A,  FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv8-r",     ARM_ARCH_V8R,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("xscale",      ARM_ARCH_XSCALE, FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("iwmmxt",      ARM_ARCH_IWMMXT, FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("iwmmxt2",     ARM_ARCH_IWMMXT2,FPU_ARCH_VFP),
-  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+  ARM_ARCH_OPT2 ("armv7a",       ARM_ARCH_V7A,         FPU_ARCH_VFP, armv7a),
+  ARM_ARCH_OPT2 ("armv7ve",      ARM_ARCH_V7VE,        FPU_ARCH_VFP, armv7ve),
+  ARM_ARCH_OPT2 ("armv7r",       ARM_ARCH_V7R,         FPU_ARCH_VFP, armv7r),
+  ARM_ARCH_OPT ("armv7m",        ARM_ARCH_V7M,         FPU_ARCH_VFP),
+  ARM_ARCH_OPT2 ("armv7-a",      ARM_ARCH_V7A,         FPU_ARCH_VFP, armv7a),
+  ARM_ARCH_OPT2 ("armv7-r",      ARM_ARCH_V7R,         FPU_ARCH_VFP, armv7r),
+  ARM_ARCH_OPT ("armv7-m",       ARM_ARCH_V7M,         FPU_ARCH_VFP),
+  ARM_ARCH_OPT2 ("armv7e-m",     ARM_ARCH_V7EM,        FPU_ARCH_VFP, armv7em),
+  ARM_ARCH_OPT ("armv8-m.base",          ARM_ARCH_V8M_BASE,    FPU_ARCH_VFP),
+  ARM_ARCH_OPT2 ("armv8-m.main",  ARM_ARCH_V8M_MAIN,   FPU_ARCH_VFP,
+                armv8m_main),
+  ARM_ARCH_OPT2 ("armv8.1-m.main", ARM_ARCH_V8_1M_MAIN,        FPU_ARCH_VFP,
+                armv8_1m_main),
+  ARM_ARCH_OPT2 ("armv8-a",      ARM_ARCH_V8A,         FPU_ARCH_VFP, armv8a),
+  ARM_ARCH_OPT2 ("armv8.1-a",    ARM_ARCH_V8_1A,       FPU_ARCH_VFP, armv81a),
+  ARM_ARCH_OPT2 ("armv8.2-a",    ARM_ARCH_V8_2A,       FPU_ARCH_VFP, armv82a),
+  ARM_ARCH_OPT2 ("armv8.3-a",    ARM_ARCH_V8_3A,       FPU_ARCH_VFP, armv82a),
+  ARM_ARCH_OPT2 ("armv8-r",      ARM_ARCH_V8R,         FPU_ARCH_VFP, armv8r),
+  ARM_ARCH_OPT2 ("armv8.4-a",    ARM_ARCH_V8_4A,       FPU_ARCH_VFP, armv84a),
+  ARM_ARCH_OPT2 ("armv8.5-a",    ARM_ARCH_V8_5A,       FPU_ARCH_VFP, armv85a),
+  ARM_ARCH_OPT ("xscale",        ARM_ARCH_XSCALE,      FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("iwmmxt",        ARM_ARCH_IWMMXT,      FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("iwmmxt2",       ARM_ARCH_IWMMXT2,     FPU_ARCH_VFP),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, NULL }
  };
  #undef ARM_ARCH_OPT
  
  /* ISA extensions in the co-processor and main instruction set space.  */
+
  struct arm_option_extension_value_table
  {
-  const char *name;
-  size_t name_len;
-  const arm_feature_set merge_value;
-  const arm_feature_set clear_value;
+  const char *           name;
+  size_t                 name_len;
+  const arm_feature_set  merge_value;
+  const arm_feature_set  clear_value;
    /* List of architectures for which an extension is available.  ARM_ARCH_NONE
       indicates that an extension is available for all architectures while
       ARM_ANY marks an empty entry.  */
-  const arm_feature_set allowed_archs[2];
+  const arm_feature_set  allowed_archs[2];
  };
  
-/* The following table must be in alphabetical order with a NULL last entry.
-   */
+/* The following table must be in alphabetical order with a NULL last entry.  */
+
  #define ARM_EXT_OPT(N, M, C, AA) { N, sizeof (N) - 1, M, C, { AA, ARM_ANY } }
  #define ARM_EXT_OPT2(N, M, C, AA1, AA2) { N, sizeof (N) - 1, M, C, {AA1, AA2} }
+
+/* DEPRECATED: Refrain from using this table to add any new extensions, instead
+   use the context sensitive approach using arm_ext_table's.  */
  static const struct arm_option_extension_value_table arm_extensions[] =
  {
    ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
@@ -26070,6 +28178,11 @@ static const struct arm_option_extension_value_table arm_extensions[] =
    ARM_EXT_OPT ("fp16",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                         ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                         ARM_ARCH_V8_2A),
+  ARM_EXT_OPT ("fp16fml",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST
+                                                 | ARM_EXT2_FP16_FML),
+                          ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST
+                                                 | ARM_EXT2_FP16_FML),
+                          ARM_ARCH_V8_2A),
    ARM_EXT_OPT2 ("idiv",        ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_V7A),
@@ -26097,12 +28210,18 @@ static const struct arm_option_extension_value_table arm_extensions[] =
    ARM_EXT_OPT ("pan",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_PAN),
                         ARM_FEATURE (ARM_EXT_V8, ARM_EXT2_PAN, 0),
                         ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8A)),
+  ARM_EXT_OPT ("predres", ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES),
+                       ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES),
+                       ARM_ARCH_V8A),
    ARM_EXT_OPT ("ras",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_RAS),
                         ARM_FEATURE (ARM_EXT_V8, ARM_EXT2_RAS, 0),
                         ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8A)),
    ARM_EXT_OPT ("rdma",  FPU_ARCH_NEON_VFP_ARMV8_1,
                         ARM_FEATURE_COPROC (FPU_NEON_ARMV8 | FPU_NEON_EXT_RDMA),
                         ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8A)),
+  ARM_EXT_OPT ("sb",   ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB),
+                       ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB),
+                       ARM_ARCH_V8A),
    ARM_EXT_OPT2 ("sec", ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_V6K),
@@ -26123,8 +28242,8 @@ static const struct arm_option_extension_value_table arm_extensions[] =
  /* ISA floating-point and Advanced SIMD extensions.  */
  struct arm_option_fpu_value_table
  {
-  const char *name;
-  const arm_feature_set value;
+  const char *           name;
+  const arm_feature_set  value;
  };
  
  /* This list should, at a minimum, contain all the fpu names
@@ -26205,7 +28324,7 @@ static const struct arm_option_value_table arm_eabis[] =
  
  struct arm_long_option_table
  {
-  const char * option;         /* Substring to match.  */
+  const char * option;                 /* Substring to match.  */
    const char * help;                   /* Help information.  */
    int (* func) (const char * subopt);  /* Function to decode sub-option.  */
    const char * deprecated;             /* If non-null, print this message.  */
@@ -26213,7 +28332,8 @@ struct arm_long_option_table
  
  static bfd_boolean
  arm_parse_extension (const char *str, const arm_feature_set *opt_set,
-                    arm_feature_set **ext_set_p)
+                    arm_feature_set *ext_set,
+                    const struct arm_ext_table *ext_table)
  {
    /* We insist on extensions being specified in alphabetical order, and with
       extensions being added before being removed.  We achieve this by having
@@ -26225,12 +28345,6 @@ arm_parse_extension (const char *str, const arm_feature_set *opt_set,
    const arm_feature_set arm_any = ARM_ANY;
    int adding_value = -1;
  
-  if (!*ext_set_p)
-    {
-      *ext_set_p = XNEW (arm_feature_set);
-      **ext_set_p = arm_arch_none;
-    }
-
    while (str != NULL && *str != 0)
      {
        const char *ext;
@@ -26285,6 +28399,41 @@ arm_parse_extension (const char *str, const arm_feature_set *opt_set,
        gas_assert (adding_value != -1);
        gas_assert (opt != NULL);
  
+      if (ext_table != NULL)
+       {
+         const struct arm_ext_table * ext_opt = ext_table;
+         bfd_boolean found = FALSE;
+         for (; ext_opt->name != NULL; ext_opt++)
+           if (ext_opt->name_len == len
+               && strncmp (ext_opt->name, str, len) == 0)
+             {
+               if (adding_value)
+                 {
+                   if (ARM_FEATURE_ZERO (ext_opt->merge))
+                       /* TODO: Option not supported.  When we remove the
+                          legacy table this case should error out.  */
+                       continue;
+
+                   ARM_MERGE_FEATURE_SETS (*ext_set, *ext_set, ext_opt->merge);
+                 }
+               else
+                 {
+                   if (ARM_FEATURE_ZERO (ext_opt->clear))
+                       /* TODO: Option not supported.  When we remove the
+                          legacy table this case should error out.  */
+                       continue;
+                   ARM_CLEAR_FEATURE (*ext_set, *ext_set, ext_opt->clear);
+                 }
+               found = TRUE;
+               break;
+             }
+         if (found)
+           {
+             str = ext;
+             continue;
+           }
+       }
+
        /* Scan over the options table trying to find an exact match. */
        for (; opt->name != NULL; opt++)
         if (opt->name_len == len && strncmp (opt->name, str, len) == 0)
@@ -26308,10 +28457,9 @@ arm_parse_extension (const char *str, const arm_feature_set *opt_set,
  
             /* Add or remove the extension.  */
             if (adding_value)
-             ARM_MERGE_FEATURE_SETS (**ext_set_p, **ext_set_p,
-                                     opt->merge_value);
+             ARM_MERGE_FEATURE_SETS (*ext_set, *ext_set, opt->merge_value);
             else
-             ARM_CLEAR_FEATURE (**ext_set_p, **ext_set_p, opt->clear_value);
+             ARM_CLEAR_FEATURE (*ext_set, *ext_set, opt->clear_value);
  
             /* Allowing Thumb division instructions for ARMv7 in autodetection
                rely on this break so that duplicate extensions (extensions
@@ -26372,9 +28520,9 @@ arm_parse_cpu (const char *str)
      if (opt->name_len == len && strncmp (opt->name, str, len) == 0)
        {
         mcpu_cpu_opt = &opt->value;
-       if (!dyn_mcpu_ext_opt)
-         dyn_mcpu_ext_opt = XNEW (arm_feature_set);
-       *dyn_mcpu_ext_opt = opt->ext;
+       if (mcpu_ext_opt == NULL)
+         mcpu_ext_opt = XNEW (arm_feature_set);
+       *mcpu_ext_opt = opt->ext;
         mcpu_fpu_opt = &opt->default_fpu;
         if (opt->canonical_name)
           {
@@ -26394,7 +28542,7 @@ arm_parse_cpu (const char *str)
           }
  
         if (ext != NULL)
-         return arm_parse_extension (ext, mcpu_cpu_opt, &dyn_mcpu_ext_opt);
+         return arm_parse_extension (ext, mcpu_cpu_opt, mcpu_ext_opt, NULL);
  
         return TRUE;
        }
@@ -26425,11 +28573,15 @@ arm_parse_arch (const char *str)
      if (opt->name_len == len && strncmp (opt->name, str, len) == 0)
        {
         march_cpu_opt = &opt->value;
+       if (march_ext_opt == NULL)
+         march_ext_opt = XNEW (arm_feature_set);
+       *march_ext_opt = arm_arch_none;
         march_fpu_opt = &opt->default_fpu;
         strcpy (selected_cpu_name, opt->name);
  
         if (ext != NULL)
-         return arm_parse_extension (ext, march_cpu_opt, &dyn_march_ext_opt);
+         return arm_parse_extension (ext, march_cpu_opt, march_ext_opt,
+                                     opt->ext_table);
  
         return TRUE;
        }
@@ -26565,6 +28717,12 @@ md_parse_option (int c, const char * arg)
        fix_v4bx = TRUE;
        break;
  
+#ifdef OBJ_ELF
+    case OPTION_FDPIC:
+      arm_fdpic = TRUE;
+      break;
+#endif /* OBJ_ELF */
+
      case 'a':
        /* Listing option.  Just ignore these, we don't support additional
          ones.  */
@@ -26659,10 +28817,15 @@ md_show_usage (FILE * fp)
  
    fprintf (fp, _("\
    --fix-v4bx              Allow BX in ARMv4 code\n"));
-}
  
+#ifdef OBJ_ELF
+  fprintf (fp, _("\
+  --fdpic                 generate an FDPIC object file\n"));
+#endif /* OBJ_ELF */
+}
  
  #ifdef OBJ_ELF
+
  typedef struct
  {
    int val;
@@ -26676,30 +28839,30 @@ typedef struct
     stable when new architectures are added.  */
  static const cpu_arch_ver_table cpu_arch_ver[] =
  {
-    {0, ARM_ARCH_V1},
-    {0, ARM_ARCH_V2},
-    {0, ARM_ARCH_V2S},
-    {0, ARM_ARCH_V3},
-    {0, ARM_ARCH_V3M},
-    {1, ARM_ARCH_V4xM},
-    {1, ARM_ARCH_V4},
-    {2, ARM_ARCH_V4TxM},
-    {2, ARM_ARCH_V4T},
-    {3, ARM_ARCH_V5xM},
-    {3, ARM_ARCH_V5},
-    {3, ARM_ARCH_V5TxM},
-    {3, ARM_ARCH_V5T},
-    {4, ARM_ARCH_V5TExP},
-    {4, ARM_ARCH_V5TE},
-    {5, ARM_ARCH_V5TEJ},
-    {6, ARM_ARCH_V6},
-    {7, ARM_ARCH_V6Z},
-    {7, ARM_ARCH_V6KZ},
-    {9, ARM_ARCH_V6K},
-    {8, ARM_ARCH_V6T2},
-    {8, ARM_ARCH_V6KT2},
-    {8, ARM_ARCH_V6ZT2},
-    {8, ARM_ARCH_V6KZT2},
+    {TAG_CPU_ARCH_PRE_V4,     ARM_ARCH_V1},
+    {TAG_CPU_ARCH_PRE_V4,     ARM_ARCH_V2},
+    {TAG_CPU_ARCH_PRE_V4,     ARM_ARCH_V2S},
+    {TAG_CPU_ARCH_PRE_V4,     ARM_ARCH_V3},
+    {TAG_CPU_ARCH_PRE_V4,     ARM_ARCH_V3M},
+    {TAG_CPU_ARCH_V4,        ARM_ARCH_V4xM},
+    {TAG_CPU_ARCH_V4,        ARM_ARCH_V4},
+    {TAG_CPU_ARCH_V4T,       ARM_ARCH_V4TxM},
+    {TAG_CPU_ARCH_V4T,       ARM_ARCH_V4T},
+    {TAG_CPU_ARCH_V5T,       ARM_ARCH_V5xM},
+    {TAG_CPU_ARCH_V5T,       ARM_ARCH_V5},
+    {TAG_CPU_ARCH_V5T,       ARM_ARCH_V5TxM},
+    {TAG_CPU_ARCH_V5T,       ARM_ARCH_V5T},
+    {TAG_CPU_ARCH_V5TE,              ARM_ARCH_V5TExP},
+    {TAG_CPU_ARCH_V5TE,              ARM_ARCH_V5TE},
+    {TAG_CPU_ARCH_V5TEJ,      ARM_ARCH_V5TEJ},
+    {TAG_CPU_ARCH_V6,        ARM_ARCH_V6},
+    {TAG_CPU_ARCH_V6KZ,              ARM_ARCH_V6Z},
+    {TAG_CPU_ARCH_V6KZ,              ARM_ARCH_V6KZ},
+    {TAG_CPU_ARCH_V6K,       ARM_ARCH_V6K},
+    {TAG_CPU_ARCH_V6T2,              ARM_ARCH_V6T2},
+    {TAG_CPU_ARCH_V6T2,              ARM_ARCH_V6KT2},
+    {TAG_CPU_ARCH_V6T2,              ARM_ARCH_V6ZT2},
+    {TAG_CPU_ARCH_V6T2,              ARM_ARCH_V6KZT2},
  
      /* When assembling a file with only ARMv6-M or ARMv6S-M instruction, GNU as
         always selected build attributes to match those of ARMv6-M
@@ -26708,26 +28871,30 @@ static const cpu_arch_ver_table cpu_arch_ver[] =
         would be selected when fully respecting chronology of architectures.
         It is thus necessary to make a special case of ARMv6-M and ARMv6S-M and
         move them before ARMv7 architectures.  */
-    {11, ARM_ARCH_V6M},
-    {12, ARM_ARCH_V6SM},
-
-    {10, ARM_ARCH_V7},
-    {10, ARM_ARCH_V7A},
-    {10, ARM_ARCH_V7R},
-    {10, ARM_ARCH_V7M},
-    {10, ARM_ARCH_V7VE},
-    {13, ARM_ARCH_V7EM},
-    {14, ARM_ARCH_V8A},
-    {14, ARM_ARCH_V8_1A},
-    {14, ARM_ARCH_V8_2A},
-    {14, ARM_ARCH_V8_3A},
-    {16, ARM_ARCH_V8M_BASE},
-    {17, ARM_ARCH_V8M_MAIN},
-    {15, ARM_ARCH_V8R},
-    {-1, ARM_ARCH_NONE}
+    {TAG_CPU_ARCH_V6_M,              ARM_ARCH_V6M},
+    {TAG_CPU_ARCH_V6S_M,      ARM_ARCH_V6SM},
+
+    {TAG_CPU_ARCH_V7,        ARM_ARCH_V7},
+    {TAG_CPU_ARCH_V7,        ARM_ARCH_V7A},
+    {TAG_CPU_ARCH_V7,        ARM_ARCH_V7R},
+    {TAG_CPU_ARCH_V7,        ARM_ARCH_V7M},
+    {TAG_CPU_ARCH_V7,        ARM_ARCH_V7VE},
+    {TAG_CPU_ARCH_V7E_M,      ARM_ARCH_V7EM},
+    {TAG_CPU_ARCH_V8,        ARM_ARCH_V8A},
+    {TAG_CPU_ARCH_V8,        ARM_ARCH_V8_1A},
+    {TAG_CPU_ARCH_V8,        ARM_ARCH_V8_2A},
+    {TAG_CPU_ARCH_V8,        ARM_ARCH_V8_3A},
+    {TAG_CPU_ARCH_V8M_BASE,   ARM_ARCH_V8M_BASE},
+    {TAG_CPU_ARCH_V8M_MAIN,   ARM_ARCH_V8M_MAIN},
+    {TAG_CPU_ARCH_V8R,       ARM_ARCH_V8R},
+    {TAG_CPU_ARCH_V8,        ARM_ARCH_V8_4A},
+    {TAG_CPU_ARCH_V8,        ARM_ARCH_V8_5A},
+    {TAG_CPU_ARCH_V8_1M_MAIN, ARM_ARCH_V8_1M_MAIN},
+    {-1,                     ARM_ARCH_NONE}
  };
  
  /* Set an attribute if it has not already been set by the user.  */
+
  static void
  aeabi_set_attribute_int (int tag, int value)
  {
@@ -26748,6 +28915,7 @@ aeabi_set_attribute_string (int tag, const char *value)
  
  /* Return whether features in the *NEEDED feature set are available via
     extensions for the architecture whose feature set is *ARCH_FSET.  */
+
  static bfd_boolean
  have_ext_for_needed_feat_p (const arm_feature_set *arch_fset,
                             const arm_feature_set *needed)
@@ -26791,6 +28959,7 @@ have_ext_for_needed_feat_p (const arm_feature_set *arch_fset,
     For -march/-mcpu=all the build attribute value of the most featureful
     architecture is returned.  Tag_CPU_arch_profile result is returned in
     PROFILE.  */
+
  static int
  get_aeabi_cpu_arch_from_fset (const arm_feature_set *arch_ext_fset,
                               const arm_feature_set *ext_fset,
@@ -26804,7 +28973,7 @@ get_aeabi_cpu_arch_from_fset (const arm_feature_set *arch_ext_fset,
    if (ARM_FEATURE_EQUAL (*arch_ext_fset, arm_arch_any))
      {
        /* Force revisiting of decision for each new architecture.  */
-      gas_assert (MAX_TAG_CPU_ARCH <= TAG_CPU_ARCH_V8M_MAIN);
+      gas_assert (MAX_TAG_CPU_ARCH <= TAG_CPU_ARCH_V8_1M_MAIN);
        *profile = 'A';
        return TAG_CPU_ARCH_V8;
      }
@@ -26893,10 +29062,11 @@ found:
  }
  
  /* Set the public EABI object attributes.  */
+
  static void
  aeabi_set_public_attributes (void)
  {
-  char profile;
+  char profile = '\0';
    int arch = -1;
    int virt_sec = 0;
    int fp16_optional = 0;
@@ -26916,26 +29086,31 @@ aeabi_set_public_attributes (void)
         ARM_MERGE_FEATURE_SETS (flags, flags, arm_ext_v4t);
  
        /* Code run during relaxation relies on selected_cpu being set.  */
+      ARM_CLEAR_FEATURE (flags_arch, flags, fpu_any);
+      flags_ext = arm_arch_none;
+      ARM_CLEAR_FEATURE (selected_arch, flags_arch, flags_ext);
+      selected_ext = flags_ext;
        selected_cpu = flags;
      }
    /* Otherwise, choose the architecture based on the capabilities of the
       requested cpu.  */
    else
-    flags = selected_cpu;
-  ARM_MERGE_FEATURE_SETS (flags, flags, *mfpu_opt);
+    {
+      ARM_MERGE_FEATURE_SETS (flags_arch, selected_arch, selected_ext);
+      ARM_CLEAR_FEATURE (flags_arch, flags_arch, fpu_any);
+      flags_ext = selected_ext;
+      flags = selected_cpu;
+    }
+  ARM_MERGE_FEATURE_SETS (flags, flags, selected_fpu);
  
    /* Allow the user to override the reported architecture.  */
-  if (object_arch)
+  if (!ARM_FEATURE_ZERO (selected_object_arch))
      {
-      ARM_CLEAR_FEATURE (flags_arch, *object_arch, fpu_any);
+      ARM_CLEAR_FEATURE (flags_arch, selected_object_arch, fpu_any);
        flags_ext = arm_arch_none;
      }
    else
-    {
-      ARM_CLEAR_FEATURE (flags_arch, flags, fpu_any);
-      flags_ext = dyn_mcpu_ext_opt ? *dyn_mcpu_ext_opt : arm_arch_none;
-      skip_exact_match = ARM_FEATURE_EQUAL (selected_cpu, arm_arch_any);
-    }
+    skip_exact_match = ARM_FEATURE_EQUAL (selected_cpu, arm_arch_any);
  
    /* When this function is run again after relaxation has happened there is no
       way to determine whether an architecture or CPU was specified by the user:
@@ -26976,7 +29151,7 @@ aeabi_set_public_attributes (void)
      aeabi_set_attribute_int (Tag_CPU_arch_profile, profile);
  
    /* Tag_DSP_extension.  */
-  if (dyn_mcpu_ext_opt && ARM_CPU_HAS_FEATURE (*dyn_mcpu_ext_opt, arm_ext_dsp))
+  if (ARM_CPU_HAS_FEATURE (selected_ext, arm_ext_dsp))
      aeabi_set_attribute_int (Tag_DSP_extension, 1);
  
    ARM_CLEAR_FEATURE (flags_arch, flags, fpu_any);
@@ -27055,6 +29230,11 @@ aeabi_set_public_attributes (void)
         }
      }
  
+  if (ARM_CPU_HAS_FEATURE (flags, mve_fp_ext))
+    aeabi_set_attribute_int (Tag_MVE_arch, 2);
+  else if (ARM_CPU_HAS_FEATURE (flags, mve_ext))
+    aeabi_set_attribute_int (Tag_MVE_arch, 1);
+
    /* Tag_VFP_HP_extension (formerly Tag_NEON_FP16_arch).  */
    if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_fp16) && fp16_optional)
      aeabi_set_attribute_int (Tag_VFP_HP_extension, 1);
@@ -27069,7 +29249,7 @@ aeabi_set_public_attributes (void)
       by the base architecture.
  
       For new architectures we will have to check these tests.  */
-  gas_assert (arch <= TAG_CPU_ARCH_V8M_MAIN);
+  gas_assert (arch <= TAG_CPU_ARCH_V8_1M_MAIN);
    if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v8)
        || ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m))
      aeabi_set_attribute_int (Tag_DIV_use, 0);
@@ -27094,17 +29274,19 @@ aeabi_set_public_attributes (void)
  
  /* Post relaxation hook.  Recompute ARM attributes now that relaxation is
     finished and free extension feature bits which will not be used anymore.  */
+
  void
  arm_md_post_relax (void)
  {
    aeabi_set_public_attributes ();
-  XDELETE (dyn_mcpu_ext_opt);
-  dyn_mcpu_ext_opt = NULL;
-  XDELETE (dyn_march_ext_opt);
-  dyn_march_ext_opt = NULL;
+  XDELETE (mcpu_ext_opt);
+  mcpu_ext_opt = NULL;
+  XDELETE (march_ext_opt);
+  march_ext_opt = NULL;
  }
  
  /* Add the default contents for the .ARM.attributes section.  */
+
  void
  arm_md_end (void)
  {
@@ -27115,7 +29297,6 @@ arm_md_end (void)
  }
  #endif /* OBJ_ELF */
  
-
  /* Parse a .cpu directive.  */
  
  static void
@@ -27135,11 +29316,9 @@ s_arm_cpu (int ignored ATTRIBUTE_UNUSED)
    for (opt = arm_cpus + 1; opt->name != NULL; opt++)
      if (streq (opt->name, name))
        {
-       mcpu_cpu_opt = &opt->value;
-       if (!dyn_mcpu_ext_opt)
-         dyn_mcpu_ext_opt = XNEW (arm_feature_set);
-       *dyn_mcpu_ext_opt = opt->ext;
-       ARM_MERGE_FEATURE_SETS (selected_cpu, *mcpu_cpu_opt, *dyn_mcpu_ext_opt);
+       selected_arch = opt->value;
+       selected_ext = opt->ext;
+       ARM_MERGE_FEATURE_SETS (selected_cpu, selected_arch, selected_ext);
         if (opt->canonical_name)
           strcpy (selected_cpu_name, opt->canonical_name);
         else
@@ -27150,9 +29329,8 @@ s_arm_cpu (int ignored ATTRIBUTE_UNUSED)
  
             selected_cpu_name[i] = 0;
           }
-       ARM_MERGE_FEATURE_SETS (cpu_variant, *mcpu_cpu_opt, *mfpu_opt);
-       if (dyn_mcpu_ext_opt)
-         ARM_MERGE_FEATURE_SETS (cpu_variant, cpu_variant, *dyn_mcpu_ext_opt);
+       ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, selected_fpu);
+
         *input_line_pointer = saved_char;
         demand_empty_rest_of_line ();
         return;
@@ -27162,7 +29340,6 @@ s_arm_cpu (int ignored ATTRIBUTE_UNUSED)
    ignore_rest_of_line ();
  }
  
-
  /* Parse a .arch directive.  */
  
  static void
@@ -27182,12 +29359,11 @@ s_arm_arch (int ignored ATTRIBUTE_UNUSED)
    for (opt = arm_archs + 1; opt->name != NULL; opt++)
      if (streq (opt->name, name))
        {
-       mcpu_cpu_opt = &opt->value;
-       XDELETE (dyn_mcpu_ext_opt);
-       dyn_mcpu_ext_opt = NULL;
-       selected_cpu = *mcpu_cpu_opt;
+       selected_arch = opt->value;
+       selected_ext = arm_arch_none;
+       selected_cpu = selected_arch;
         strcpy (selected_cpu_name, opt->name);
-       ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, *mfpu_opt);
+       ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, selected_fpu);
         *input_line_pointer = saved_char;
         demand_empty_rest_of_line ();
         return;
@@ -27198,7 +29374,6 @@ s_arm_arch (int ignored ATTRIBUTE_UNUSED)
    ignore_rest_of_line ();
  }
  
-
  /* Parse a .object_arch directive.  */
  
  static void
@@ -27218,7 +29393,7 @@ s_arm_object_arch (int ignored ATTRIBUTE_UNUSED)
    for (opt = arm_archs + 1; opt->name != NULL; opt++)
      if (streq (opt->name, name))
        {
-       object_arch = &opt->value;
+       selected_object_arch = opt->value;
         *input_line_pointer = saved_char;
         demand_empty_rest_of_line ();
         return;
@@ -27235,7 +29410,6 @@ static void
  s_arm_arch_extension (int ignored ATTRIBUTE_UNUSED)
  {
    const struct arm_option_extension_value_table *opt;
-  const arm_feature_set arm_any = ARM_ANY;
    char saved_char;
    char *name;
    int adding_value = 1;
@@ -27261,9 +29435,9 @@ s_arm_arch_extension (int ignored ATTRIBUTE_UNUSED)
         for (i = 0; i < nb_allowed_archs; i++)
           {
             /* Empty entry.  */
-           if (ARM_FEATURE_EQUAL (opt->allowed_archs[i], arm_any))
+           if (ARM_CPU_IS_ANY (opt->allowed_archs[i]))
               continue;
-           if (ARM_FSET_CPU_SUBSET (opt->allowed_archs[i], *mcpu_cpu_opt))
+           if (ARM_FSET_CPU_SUBSET (opt->allowed_archs[i], selected_arch))
               break;
           }
  
@@ -27274,20 +29448,14 @@ s_arm_arch_extension (int ignored ATTRIBUTE_UNUSED)
             break;
           }
  
-       if (!dyn_mcpu_ext_opt)
-         {
-           dyn_mcpu_ext_opt = XNEW (arm_feature_set);
-           *dyn_mcpu_ext_opt = arm_arch_none;
-         }
         if (adding_value)
-         ARM_MERGE_FEATURE_SETS (*dyn_mcpu_ext_opt, *dyn_mcpu_ext_opt,
+         ARM_MERGE_FEATURE_SETS (selected_ext, selected_ext,
                                   opt->merge_value);
         else
-         ARM_CLEAR_FEATURE (*dyn_mcpu_ext_opt, *dyn_mcpu_ext_opt,
-                            opt->clear_value);
+         ARM_CLEAR_FEATURE (selected_ext, selected_ext, opt->clear_value);
  
-       ARM_MERGE_FEATURE_SETS (selected_cpu, *mcpu_cpu_opt, *dyn_mcpu_ext_opt);
-       ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, *mfpu_opt);
+       ARM_MERGE_FEATURE_SETS (selected_cpu, selected_arch, selected_ext);
+       ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, selected_fpu);
         *input_line_pointer = saved_char;
         demand_empty_rest_of_line ();
         /* Allowing Thumb division instructions for ARMv7 in autodetection rely
@@ -27322,10 +29490,13 @@ s_arm_fpu (int ignored ATTRIBUTE_UNUSED)
    for (opt = arm_fpus; opt->name != NULL; opt++)
      if (streq (opt->name, name))
        {
-       mfpu_opt = &opt->value;
-       ARM_MERGE_FEATURE_SETS (cpu_variant, *mcpu_cpu_opt, *mfpu_opt);
-       if (dyn_mcpu_ext_opt)
-         ARM_MERGE_FEATURE_SETS (cpu_variant, cpu_variant, *dyn_mcpu_ext_opt);
+       selected_fpu = opt->value;
+#ifndef CPU_DEFAULT
+       if (no_cpu_selected ())
+         ARM_MERGE_FEATURE_SETS (cpu_variant, arm_arch_any, selected_fpu);
+       else
+#endif
+         ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, selected_fpu);
         *input_line_pointer = saved_char;
         demand_empty_rest_of_line ();
         return;
@@ -27405,6 +29576,7 @@ arm_convert_symbolic_attribute (const char *name)
        T (Tag_T2EE_use),
        T (Tag_Virtualization_use),
        T (Tag_DSP_extension),
+      T (Tag_MVE_arch),
        /* We deliberately do not include Tag_MPextension_use_legacy.  */
  #undef T
      };
@@ -27420,10 +29592,10 @@ arm_convert_symbolic_attribute (const char *name)
    return -1;
  }
  
-
  /* Apply sym value for relocations only in the case that they are for
     local symbols in the same segment as the fixup and you have the
     respective architectural feature for blx and simple switches.  */
+
  int
  arm_apply_sym_value (struct fix * fixP, segT this_seg)
  {