/* tc-arm.c -- Assemble for the ARM
- Copyright (C) 1994-2019 Free Software Foundation, Inc.
+ Copyright (C) 1994-2020 Free Software Foundation, Inc.
Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
Modified by David Taylor (dtaylor@armltd.co.uk)
Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
#include "obstack.h"
#include "libiberty.h"
#include "opcode/arm.h"
+#include "cpu-arm.h"
#ifdef OBJ_ELF
#include "elf/arm.h"
should define CPU_DEFAULT here. */
#endif
+/* Perform range checks on positive and negative overflows by checking if the
+ VALUE given fits within the range of an BITS sized immediate. */
+static bfd_boolean out_of_range_p (offsetT value, offsetT bits)
+ {
+ gas_assert (bits < (offsetT)(sizeof (value) * 8));
+ return (value & ~((1 << bits)-1))
+ && ((value & ~((1 << bits)-1)) != ~((1 << bits)-1));
+}
+
#ifndef FPU_DEFAULT
# ifdef TE_LINUX
# define FPU_DEFAULT FPU_ARCH_FPA
static int fix_v4bx = FALSE;
/* Warn on using deprecated features. */
static int warn_on_deprecated = TRUE;
+static int warn_on_restrict_it = FALSE;
/* Understand CodeComposer Studio assembly syntax. */
bfd_boolean codecomposer_syntax = FALSE;
static const arm_feature_set arm_ext_v7 = ARM_FEATURE_CORE_LOW (ARM_EXT_V7);
static const arm_feature_set arm_ext_v7a = ARM_FEATURE_CORE_LOW (ARM_EXT_V7A);
static const arm_feature_set arm_ext_v7r = ARM_FEATURE_CORE_LOW (ARM_EXT_V7R);
+static const arm_feature_set arm_ext_v8r = ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8R);
#ifdef OBJ_ELF
static const arm_feature_set ATTRIBUTE_UNUSED arm_ext_v7m = ARM_FEATURE_CORE_LOW (ARM_EXT_V7M);
#endif
ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB);
static const arm_feature_set arm_ext_predres =
ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
+static const arm_feature_set arm_ext_bf16 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16);
+static const arm_feature_set arm_ext_i8mm =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM);
+static const arm_feature_set arm_ext_crc =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC);
+static const arm_feature_set arm_ext_cde =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE);
+static const arm_feature_set arm_ext_cde0 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE0);
+static const arm_feature_set arm_ext_cde1 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE1);
+static const arm_feature_set arm_ext_cde2 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE2);
+static const arm_feature_set arm_ext_cde3 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE3);
+static const arm_feature_set arm_ext_cde4 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE4);
+static const arm_feature_set arm_ext_cde5 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE5);
+static const arm_feature_set arm_ext_cde6 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE6);
+static const arm_feature_set arm_ext_cde7 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE7);
static const arm_feature_set arm_arch_any = ARM_ANY;
-#ifdef OBJ_ELF
static const arm_feature_set fpu_any = FPU_ANY;
-#endif
static const arm_feature_set arm_arch_full ATTRIBUTE_UNUSED = ARM_FEATURE (-1, -1, -1);
static const arm_feature_set arm_arch_t2 = ARM_ARCH_THUMB2;
static const arm_feature_set arm_arch_none = ARM_ARCH_NONE;
static const arm_feature_set fpu_vfp_v3_or_neon_ext =
ARM_FEATURE_COPROC (FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
static const arm_feature_set mve_ext =
- ARM_FEATURE_COPROC (FPU_MVE);
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE);
static const arm_feature_set mve_fp_ext =
- ARM_FEATURE_COPROC (FPU_MVE_FP);
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE_FP);
+/* Note: This has more than one bit set, which means using it with
+ mark_feature_used (which returns if *any* of the bits are set in the current
+ cpu variant) can give surprising results. */
+static const arm_feature_set armv8m_fp =
+ ARM_FEATURE_COPROC (FPU_VFP_V5_SP_D16);
#ifdef OBJ_ELF
static const arm_feature_set fpu_vfp_fp16 =
ARM_FEATURE_COPROC (FPU_VFP_EXT_FP16);
ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8);
static const arm_feature_set fpu_crypto_ext_armv8 =
ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8);
-static const arm_feature_set crc_ext_armv8 =
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8);
static const arm_feature_set fpu_neon_ext_v8_1 =
ARM_FEATURE_COPROC (FPU_NEON_EXT_RDMA);
static const arm_feature_set fpu_neon_ext_dotprod =
/* Feature bits selected by the last .object_arch directive. */
static arm_feature_set selected_object_arch = ARM_ARCH_NONE;
/* Must be long enough to hold any of the names in arm_cpus. */
+static const struct arm_ext_table * selected_ctx_ext_table = NULL;
static char selected_cpu_name[20];
extern FLONUM_TYPE generic_floating_point_number;
NT_float,
NT_poly,
NT_signed,
+ NT_bfloat,
NT_unsigned
};
unsigned size;
};
-#define NEON_MAX_TYPE_ELS 4
+#define NEON_MAX_TYPE_ELS 5
struct neon_type
{
VPT_INSN, /* The VPT/VPST insn has been parsed. */
MVE_OUTSIDE_PRED_INSN , /* Instruction to indicate a MVE instruction without
a predication code. */
- MVE_UNPREDICABLE_INSN /* MVE instruction that is non-predicable. */
+ MVE_UNPREDICABLE_INSN, /* MVE instruction that is non-predicable. */
};
/* The maximum number of operands we need. */
unsigned isreg : 1; /* Operand was a register. */
unsigned immisreg : 2; /* .imm field is a second register.
0: imm, 1: gpr, 2: MVE Q-register. */
- unsigned isscalar : 1; /* Operand is a (Neon) scalar. */
+ unsigned isscalar : 2; /* Operand is a (SIMD) scalar:
+ 0) not scalar,
+ 1) Neon scalar,
+ 2) MVE scalar. */
unsigned immisalign : 1; /* Immediate is an alignment specifier. */
unsigned immisfloat : 1; /* Immediate was parsed as a float. */
/* Note: we abuse "regisimm" to mean "is Neon register" in VMOV
unsigned isvec : 1; /* Is a single, double or quad VFP/Neon reg. */
unsigned isquad : 1; /* Operand is SIMD quad register. */
unsigned issingle : 1; /* Operand is VFP single-precision register. */
+ unsigned iszr : 1; /* Operand is ZR register. */
unsigned hasreloc : 1; /* Operand has relocation suffix. */
unsigned writeback : 1; /* Operand has trailing ! */
unsigned preind : 1; /* Preindexed address. */
REG_TYPE_MMXWCG,
REG_TYPE_XSCALE,
REG_TYPE_RNB,
+ REG_TYPE_ZR
};
/* Structure for a hash table entry for a register.
[REG_TYPE_MMXWCG] = N_("iWMMXt scalar register expected"),
[REG_TYPE_XSCALE] = N_("XScale accumulator register expected"),
[REG_TYPE_MQ] = N_("MVE vector register expected"),
- [REG_TYPE_RNB] = N_("")
+ [REG_TYPE_RNB] = ""
};
/* Some well known registers that we refer to directly elsewhere. */
#define BAD_ADDR_MODE _("instruction does not accept this addressing mode")
#define BAD_BRANCH _("branch must be last instruction in IT block")
#define BAD_BRANCH_OFF _("branch out of range or not a multiple of 2")
+#define BAD_NO_VPT _("instruction not allowed in VPT block")
#define BAD_NOT_IT _("instruction not allowed in IT block")
#define BAD_NOT_VPT _("instruction missing MVE vector predication code")
#define BAD_FPU _("selected FPU does not support instruction")
_("cannot use writeback with PC-relative addressing")
#define BAD_RANGE _("branch out of range")
#define BAD_FP16 _("selected processor does not support fp16 instruction")
+#define BAD_BF16 _("selected processor does not support bf16 instruction")
+#define BAD_CDE _("selected processor does not support cde instruction")
+#define BAD_CDE_COPROC _("coprocessor for insn is not enabled for cde")
#define UNPRED_REG(R) _("using " R " results in unpredictable behaviour")
#define THUMB1_RELOC_ONLY _("relocation valid in thumb1 code only")
#define MVE_NOT_IT _("Warning: instruction is UNPREDICTABLE in an IT " \
#define BAD_MVE_SRCDEST _("Warning: 32-bit element size and same destination "\
"and source operands makes instruction UNPREDICTABLE")
#define BAD_EL_TYPE _("bad element type for instruction")
+#define MVE_BAD_QREG _("MVE vector register Q[0..7] expected")
static struct hash_control * arm_ops_hsh;
static struct hash_control * arm_cond_hsh;
} \
while (0)
+/* Toggle value[pos]. */
+#define TOGGLE_BIT(value, pos) (value ^ (1 << pos))
+
/* Pure syntax. */
/* This array holds the chars that always start a comment. If the
/* As in 0f12.456 */
/* or 0d1.2345e12 */
-const char FLT_CHARS[] = "rRsSfFdDxXeEpP";
+const char FLT_CHARS[] = "rRsSfFdDxXeEpPHh";
/* Prefix characters that indicate the start of an immediate
value. */
#define skip_whitespace(str) do { if (*(str) == ' ') ++(str); } while (0)
+enum fp_16bit_format
+{
+ ARM_FP16_FORMAT_IEEE = 0x1,
+ ARM_FP16_FORMAT_ALTERNATIVE = 0x2,
+ ARM_FP16_FORMAT_DEFAULT = 0x3
+};
+
+static enum fp_16bit_format fp16_format = ARM_FP16_FORMAT_DEFAULT;
+
+
static inline int
skip_past_char (char ** str, char c)
{
switch (type)
{
+ case 'H':
+ case 'h':
+ prec = 1;
+ break;
+
+ /* If this is a bfloat16, then parse it slightly differently, as it
+ does not follow the IEEE specification for floating point numbers
+ exactly. */
+ case 'b':
+ {
+ FLONUM_TYPE generic_float;
+
+ t = atof_ieee_detail (input_line_pointer, 1, 8, words, &generic_float);
+
+ if (t)
+ input_line_pointer = t;
+ else
+ return _("invalid floating point number");
+
+ switch (generic_float.sign)
+ {
+ /* Is +Inf. */
+ case 'P':
+ words[0] = 0x7f80;
+ break;
+
+ /* Is -Inf. */
+ case 'N':
+ words[0] = 0xff80;
+ break;
+
+ /* Is NaN. */
+ /* bfloat16 has two types of NaN - quiet and signalling.
+ Quiet NaN has bit[6] == 1 && faction != 0, whereas
+ signalling NaN's have bit[0] == 0 && fraction != 0.
+ Chosen this specific encoding as it is the same form
+ as used by other IEEE 754 encodings in GAS. */
+ case 0:
+ words[0] = 0x7fff;
+ break;
+
+ default:
+ break;
+ }
+
+ *sizeP = 2;
+
+ md_number_to_chars (litP, (valueT) words[0], sizeof (LITTLENUM_TYPE));
+
+ return NULL;
+ }
case 'f':
case 'F':
case 's':
input_line_pointer = t;
*sizeP = prec * sizeof (LITTLENUM_TYPE);
- if (target_big_endian)
- {
- for (i = 0; i < prec; i++)
- {
- md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE));
- litP += sizeof (LITTLENUM_TYPE);
- }
- }
+ if (target_big_endian || prec == 1)
+ for (i = 0; i < prec; i++)
+ {
+ md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE));
+ litP += sizeof (LITTLENUM_TYPE);
+ }
+ else if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_endian_pure))
+ for (i = prec - 1; i >= 0; i--)
+ {
+ md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE));
+ litP += sizeof (LITTLENUM_TYPE);
+ }
else
- {
- if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_endian_pure))
- for (i = prec - 1; i >= 0; i--)
- {
- md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE));
- litP += sizeof (LITTLENUM_TYPE);
- }
- else
- /* For a 4 byte float the order of elements in `words' is 1 0.
- For an 8 byte float the order is 1 0 3 2. */
- for (i = 0; i < prec; i += 2)
- {
- md_number_to_chars (litP, (valueT) words[i + 1],
- sizeof (LITTLENUM_TYPE));
- md_number_to_chars (litP + sizeof (LITTLENUM_TYPE),
- (valueT) words[i], sizeof (LITTLENUM_TYPE));
- litP += 2 * sizeof (LITTLENUM_TYPE);
- }
- }
+ /* For a 4 byte float the order of elements in `words' is 1 0.
+ For an 8 byte float the order is 1 0 3 2. */
+ for (i = 0; i < prec; i += 2)
+ {
+ md_number_to_chars (litP, (valueT) words[i + 1],
+ sizeof (LITTLENUM_TYPE));
+ md_number_to_chars (litP + sizeof (LITTLENUM_TYPE),
+ (valueT) words[i], sizeof (LITTLENUM_TYPE));
+ litP += 2 * sizeof (LITTLENUM_TYPE);
+ }
return NULL;
}
thissize = 64;
ptr++;
goto done;
+ case 'b':
+ thistype = NT_bfloat;
+ switch (TOLOWER (*(++ptr)))
+ {
+ case 'f':
+ ptr += 1;
+ thissize = strtoul (ptr, &ptr, 10);
+ if (thissize != 16)
+ {
+ as_bad (_("bad size %d in type specifier"), thissize);
+ return FAIL;
+ }
+ goto done;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case ' ': case '.':
+ as_bad (_("unexpected type character `b' -- did you mean `bf'?"));
+ return FAIL;
+ default:
+ break;
+ }
+ break;
default:
as_bad (_("unexpected character `%c' in type specifier"), *ptr);
return FAIL;
{
if (type != REG_TYPE_VFD
&& !(type == REG_TYPE_VFS
- && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_2)))
+ && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_2))
+ && !(type == REG_TYPE_NQ
+ && ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)))
{
- first_error (_("only D registers may be indexed"));
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ first_error (_("only D and Q registers may be indexed"));
+ else
+ first_error (_("only D registers may be indexed"));
return FAIL;
}
just do easy checks here, and do further checks later. */
static int
-parse_scalar (char **ccp, int elsize, struct neon_type_el *type)
+parse_scalar (char **ccp, int elsize, struct neon_type_el *type, enum
+ arm_reg_type reg_type)
{
int reg;
char *str = *ccp;
struct neon_typed_alias atype;
- enum arm_reg_type reg_type = REG_TYPE_VFD;
-
- if (elsize == 4)
- reg_type = REG_TYPE_VFS;
+ unsigned reg_size;
reg = parse_typed_reg_or_scalar (&str, reg_type, NULL, &atype);
+ switch (reg_type)
+ {
+ case REG_TYPE_VFS:
+ reg_size = 32;
+ break;
+ case REG_TYPE_VFD:
+ reg_size = 64;
+ break;
+ case REG_TYPE_MQ:
+ reg_size = 128;
+ break;
+ default:
+ gas_assert (0);
+ return FAIL;
+ }
+
if (reg == FAIL || (atype.defined & NTA_HASINDEX) == 0)
return FAIL;
- if (atype.index == NEON_ALL_LANES)
+ if (reg_type != REG_TYPE_MQ && atype.index == NEON_ALL_LANES)
{
first_error (_("scalar must have an index"));
return FAIL;
}
- else if (atype.index >= 64 / elsize)
+ else if (atype.index >= reg_size / elsize)
{
first_error (_("scalar index out of range"));
return FAIL;
const char apsr_str[] = "apsr";
int apsr_str_len = strlen (apsr_str);
- reg = arm_reg_parse (&str, REGLIST_RN);
+ reg = arm_reg_parse (&str, REG_TYPE_RN);
if (etype == REGLIST_CLRM)
{
if (reg == REG_SP || reg == REG_PC)
hash_delete (arm_reg_hsh, name, FALSE);
free ((char *) reg->name);
- if (reg->neon)
- free (reg->neon);
+ free (reg->neon);
free (reg);
/* Also locate the all upper case and all lower case versions.
{
hash_delete (arm_reg_hsh, nbuf, FALSE);
free ((char *) reg->name);
- if (reg->neon)
- free (reg->neon);
+ free (reg->neon);
free (reg);
}
{
hash_delete (arm_reg_hsh, nbuf, FALSE);
free ((char *) reg->name);
- if (reg->neon)
- free (reg->neon);
+ free (reg->neon);
free (reg);
}
}
return;
-error:
+ error:
ignore_rest_of_line ();
}
op = 0xc700 | mask;
add_unwind_opcode (op, 2);
return;
-error:
+ error:
ignore_rest_of_line ();
}
}
#endif /* TE_PE */
+int
+arm_is_largest_exponent_ok (int precision)
+{
+ /* precision == 1 ensures that this will only return
+ true for 16 bit floats. */
+ return (precision == 1) && (fp16_format == ARM_FP16_FORMAT_ALTERNATIVE);
+}
+
+static void
+set_fp16_format (int dummy ATTRIBUTE_UNUSED)
+{
+ char saved_char;
+ char* name;
+ enum fp_16bit_format new_format;
+
+ new_format = ARM_FP16_FORMAT_DEFAULT;
+
+ name = input_line_pointer;
+ while (*input_line_pointer && !ISSPACE (*input_line_pointer))
+ input_line_pointer++;
+
+ saved_char = *input_line_pointer;
+ *input_line_pointer = 0;
+
+ if (strcasecmp (name, "ieee") == 0)
+ new_format = ARM_FP16_FORMAT_IEEE;
+ else if (strcasecmp (name, "alternative") == 0)
+ new_format = ARM_FP16_FORMAT_ALTERNATIVE;
+ else
+ {
+ as_bad (_("unrecognised float16 format \"%s\""), name);
+ goto cleanup;
+ }
+
+ /* Only set fp16_format if it is still the default (aka not already
+ been set yet). */
+ if (fp16_format == ARM_FP16_FORMAT_DEFAULT)
+ fp16_format = new_format;
+ else
+ {
+ if (new_format != fp16_format)
+ as_warn (_("float16 format cannot be set more than once, ignoring."));
+ }
+
+ cleanup:
+ *input_line_pointer = saved_char;
+ ignore_rest_of_line ();
+}
+
/* This table describes all the machine specific pseudo-ops the assembler
has to support. The fields are:
pseudo-op name without dot
{ "extend", float_cons, 'x' },
{ "ldouble", float_cons, 'x' },
{ "packed", float_cons, 'p' },
+ { "bfloat16", float_cons, 'b' },
#ifdef TE_PE
{"secrel32", pe_directive_secrel, 0},
#endif
{"asmfunc", s_ccs_asmfunc, 0},
{"endasmfunc", s_ccs_endasmfunc, 0},
+ {"float16", float_cons, 'h' },
+ {"float16_format", set_fp16_format, 0 },
+
{ 0, 0, 0 }
};
-\f
+
/* Parser functions used exclusively in instruction operands. */
/* Generic immediate-value read function for use in insn parsing.
goto unsupported_psr;
p += 4;
-check_suffix:
+ check_suffix:
if (*p == '_')
{
/* A suffix follows. */
char *ptr = *str;
struct neon_type_el optype;
- if ((val = parse_scalar (&ptr, 8, &optype)) != FAIL)
+ if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ)) != FAIL)
+ {
+ /* Cases 17 or 19. */
+ inst.operands[i].reg = val;
+ inst.operands[i].isvec = 1;
+ inst.operands[i].isscalar = 2;
+ inst.operands[i].vectype = optype;
+ inst.operands[i++].present = 1;
+
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+
+ if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) != FAIL)
+ {
+ /* Case 17: VMOV<c>.<dt> <Qd[idx]>, <Rt> */
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i].present = 1;
+ }
+ else if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ)) != FAIL)
+ {
+ /* Case 19: VMOV<c> <Qd[idx]>, <Qd[idx2]>, <Rt>, <Rt2> */
+ inst.operands[i].reg = val;
+ inst.operands[i].isvec = 1;
+ inst.operands[i].isscalar = 2;
+ inst.operands[i].vectype = optype;
+ inst.operands[i++].present = 1;
+
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+
+ if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) == FAIL)
+ goto wanted_arm;
+
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i++].present = 1;
+
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+
+ if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) == FAIL)
+ goto wanted_arm;
+
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i].present = 1;
+ }
+ else
+ {
+ first_error (_("expected ARM or MVE vector register"));
+ return FAIL;
+ }
+ }
+ else if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_VFD)) != FAIL)
{
/* Case 4: VMOV<c><q>.<size> <Dn[x]>, <Rd>. */
inst.operands[i].reg = val;
inst.operands[i].isreg = 1;
inst.operands[i].present = 1;
}
- else if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype, &optype))
- != FAIL)
+ else if (((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype, &optype))
+ != FAIL)
+ || ((val = arm_typed_reg_parse (&ptr, REG_TYPE_MQ, &rtype, &optype))
+ != FAIL))
{
/* Cases 0, 1, 2, 3, 5 (D only). */
if (skip_past_comma (&ptr) == FAIL)
inst.operands[i].present = 1;
}
}
- else if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype,
- &optype)) != FAIL)
+ else if (((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype,
+ &optype)) != FAIL)
+ || ((val = arm_typed_reg_parse (&ptr, REG_TYPE_MQ, &rtype,
+ &optype)) != FAIL))
{
/* Case 0: VMOV<c><q> <Qd>, <Qm>
Case 1: VMOV<c><q> <Dd>, <Dm>
}
else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) != FAIL)
{
- /* Cases 6, 7. */
+ /* Cases 6, 7, 16, 18. */
inst.operands[i].reg = val;
inst.operands[i].isreg = 1;
inst.operands[i++].present = 1;
if (skip_past_comma (&ptr) == FAIL)
goto wanted_comma;
- if ((val = parse_scalar (&ptr, 8, &optype)) != FAIL)
+ if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ)) != FAIL)
+ {
+ /* Case 18: VMOV<c>.<dt> <Rt>, <Qn[idx]> */
+ inst.operands[i].reg = val;
+ inst.operands[i].isscalar = 2;
+ inst.operands[i].present = 1;
+ inst.operands[i].vectype = optype;
+ }
+ else if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_VFD)) != FAIL)
{
/* Case 6: VMOV<c><q>.<dt> <Rd>, <Dn[x]> */
inst.operands[i].reg = val;
}
else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) != FAIL)
{
- /* Case 7: VMOV<c><q> <Rd>, <Rn>, <Dm> */
inst.operands[i].reg = val;
inst.operands[i].isreg = 1;
inst.operands[i++].present = 1;
goto wanted_comma;
if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_VFSD, &rtype, &optype))
- == FAIL)
+ != FAIL)
{
- first_error (_(reg_expected_msgs[REG_TYPE_VFSD]));
- return FAIL;
- }
-
- inst.operands[i].reg = val;
- inst.operands[i].isreg = 1;
- inst.operands[i].isvec = 1;
- inst.operands[i].issingle = (rtype == REG_TYPE_VFS);
- inst.operands[i].vectype = optype;
- inst.operands[i].present = 1;
+ /* Case 7: VMOV<c><q> <Rd>, <Rn>, <Dm> */
- if (rtype == REG_TYPE_VFS)
- {
- /* Case 14. */
- i++;
- if (skip_past_comma (&ptr) == FAIL)
- goto wanted_comma;
- if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_VFS, NULL,
- &optype)) == FAIL)
- {
- first_error (_(reg_expected_msgs[REG_TYPE_VFS]));
- return FAIL;
- }
inst.operands[i].reg = val;
inst.operands[i].isreg = 1;
inst.operands[i].isvec = 1;
- inst.operands[i].issingle = 1;
+ inst.operands[i].issingle = (rtype == REG_TYPE_VFS);
inst.operands[i].vectype = optype;
inst.operands[i].present = 1;
+
+ if (rtype == REG_TYPE_VFS)
+ {
+ /* Case 14. */
+ i++;
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+ if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_VFS, NULL,
+ &optype)) == FAIL)
+ {
+ first_error (_(reg_expected_msgs[REG_TYPE_VFS]));
+ return FAIL;
+ }
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i].isvec = 1;
+ inst.operands[i].issingle = 1;
+ inst.operands[i].vectype = optype;
+ inst.operands[i].present = 1;
+ }
+ }
+ else
+ {
+ if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ))
+ != FAIL)
+ {
+ /* Case 16: VMOV<c> <Rt>, <Rt2>, <Qd[idx]>, <Qd[idx2]> */
+ inst.operands[i].reg = val;
+ inst.operands[i].isvec = 1;
+ inst.operands[i].isscalar = 2;
+ inst.operands[i].vectype = optype;
+ inst.operands[i++].present = 1;
+
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+
+ if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ))
+ == FAIL)
+ {
+ first_error (_(reg_expected_msgs[REG_TYPE_MQ]));
+ return FAIL;
+ }
+ inst.operands[i].reg = val;
+ inst.operands[i].isvec = 1;
+ inst.operands[i].isscalar = 2;
+ inst.operands[i].vectype = optype;
+ inst.operands[i].present = 1;
+ }
+ else
+ {
+ first_error (_("VFP single, double or MVE vector register"
+ " expected"));
+ return FAIL;
+ }
}
}
else if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_VFS, NULL, &optype))
OP_RNDMQ, /* Neon double precision (0..31) or MVE vector register. */
OP_RNDMQR, /* Neon double precision (0..31), MVE vector or ARM register.
*/
+ OP_RNSDMQR, /* Neon single or double precision, MVE vector or ARM register.
+ */
OP_RNQ, /* Neon quad precision register */
OP_RNQMQ, /* Neon quad or MVE vector register. */
OP_RVSD, /* VFP single or double precision register */
+ OP_RVSD_COND, /* VFP single, double precision register or condition code. */
+ OP_RVSDMQ, /* VFP single, double precision or MVE vector register. */
OP_RNSD, /* Neon single or double precision register */
OP_RNDQ, /* Neon double or quad precision register */
OP_RNDQMQ, /* Neon double, quad or MVE vector register. */
+ OP_RNDQMQR, /* Neon double, quad, MVE vector or ARM register. */
OP_RNSDQ, /* Neon single, double or quad precision register */
OP_RNSC, /* Neon scalar D[X] */
OP_RVC, /* VFP control register */
OP_RIWG, /* iWMMXt wCG register */
OP_RXA, /* XScale accumulator register */
+ OP_RNSDMQ, /* Neon single, double or MVE vector register */
OP_RNSDQMQ, /* Neon single, double or quad register or MVE vector register
*/
OP_RNSDQMQR, /* Neon single, double or quad register, MVE vector register or
GPR (no SP/SP) */
OP_RMQ, /* MVE vector register. */
+ OP_RMQRZ, /* MVE vector or ARM register including ZR. */
+ OP_RMQRR, /* MVE vector or ARM register. */
/* New operands for Armv8.1-M Mainline. */
OP_LR, /* ARM LR register */
OP_RRe, /* ARM register, only even numbered. */
OP_RRo, /* ARM register, only odd numbered, not r13 or r15. */
OP_RRnpcsp_I32, /* ARM register (no BadReg) or literal 1 .. 32 */
+ OP_RR_ZR, /* ARM register or ZR but no PC */
OP_REGLST, /* ARM register list */
OP_CLRMLST, /* CLRM register list */
OP_RNDQ_I0, /* Neon D or Q reg, or immediate zero. */
OP_RVSD_I0, /* VFP S or D reg, or immediate zero. */
OP_RSVD_FI0, /* VFP S or D reg, or floating point immediate zero. */
+ OP_RSVDMQ_FI0, /* VFP S, D, MVE vector register or floating point immediate
+ zero. */
OP_RR_RNSC, /* ARM reg or Neon scalar. */
OP_RNSD_RNSC, /* Neon S or D reg, or Neon scalar. */
OP_RNSDQ_RNSC, /* Vector S, D or Q reg, or Neon scalar. */
OP_RNSDQ_RNSC_MQ, /* Vector S, D or Q reg, Neon scalar or MVE vector register.
*/
+ OP_RNSDQ_RNSC_MQ_RR, /* Vector S, D or Q reg, or MVE vector reg , or Neon
+ scalar, or ARM register. */
OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar. */
+ OP_RNDQ_RNSC_RR, /* Neon D or Q reg, Neon scalar, or ARM register. */
+ OP_RNDQMQ_RNSC_RR, /* Neon D or Q reg, Neon scalar, MVE vector or ARM
+ register. */
+ OP_RNDQMQ_RNSC, /* Neon D, Q or MVE vector reg, or Neon scalar. */
OP_RND_RNSC, /* Neon D reg, or Neon scalar. */
OP_VMOV, /* Neon VMOV operands. */
OP_RNDQ_Ibig, /* Neon D or Q reg, or big immediate for logic and VMVN. */
+ /* Neon D, Q or MVE vector register, or big immediate for logic and VMVN. */
+ OP_RNDQMQ_Ibig,
OP_RNDQ_I63b, /* Neon D or Q reg, or immediate for shift. */
+ OP_RNDQMQ_I63b_RR, /* Neon D or Q reg, immediate for shift, MVE vector or
+ ARM register. */
OP_RIWR_I32z, /* iWMMXt wR register, or immediate 0 .. 32 for iWMMXt2. */
OP_VLDR, /* VLDR operand. */
OP_I31w, /* 0 .. 31, optional trailing ! */
OP_I32, /* 1 .. 32 */
OP_I32z, /* 0 .. 32 */
+ OP_I48_I64, /* 48 or 64 */
OP_I63, /* 0 .. 63 */
OP_I63s, /* -64 .. 63 */
OP_I64, /* 1 .. 64 */
OP_I64z, /* 0 .. 64 */
+ OP_I127, /* 0 .. 127 */
OP_I255, /* 0 .. 255 */
-
+ OP_I511, /* 0 .. 511 */
+ OP_I4095, /* 0 .. 4095 */
+ OP_I8191, /* 0 .. 8191 */
OP_I4b, /* immediate, prefix optional, 1 .. 4 */
OP_I7b, /* 0 .. 7 */
OP_I15b, /* 0 .. 15 */
OP_oRNSDQ, /* Optional single, double or quad precision vector register */
OP_oRNSDQMQ, /* Optional single, double or quad register or MVE vector
register. */
+ OP_oRNSDMQ, /* Optional single, double register or MVE vector
+ register. */
OP_oSHll, /* LSL immediate */
OP_oSHar, /* ASR immediate */
OP_oSHllar, /* LSL or ASR immediate */
OP_oROR, /* ROR 0/8/16/24 */
OP_oBARRIER_I15, /* Option argument for a barrier instruction. */
+ OP_oRMQRZ, /* optional MVE vector or ARM register including ZR. */
+
/* Some pre-defined mixed (ARM/THUMB) operands. */
OP_RR_npcsp = MIX_ARM_THUMB_OPERANDS (OP_RR, OP_RRnpcsp),
OP_RRnpc_npcsp = MIX_ARM_THUMB_OPERANDS (OP_RRnpc, OP_RRnpcsp),
inst.operands[i].isvec = (rtype == REG_TYPE_VFS \
|| rtype == REG_TYPE_VFD \
|| rtype == REG_TYPE_NQ); \
+ inst.operands[i].iszr = (rtype == REG_TYPE_ZR); \
} \
while (0)
inst.operands[i].isvec = (rtype == REG_TYPE_VFS \
|| rtype == REG_TYPE_VFD \
|| rtype == REG_TYPE_NQ); \
+ inst.operands[i].iszr = (rtype == REG_TYPE_ZR); \
} \
while (0)
} \
while (0)
-#define po_scalar_or_goto(elsz, label) \
+#define po_imm1_or_imm2_or_fail(imm1, imm2, popt) \
+ do \
+ { \
+ expressionS exp; \
+ my_get_expression (&exp, &str, popt); \
+ if (exp.X_op != O_constant) \
+ { \
+ inst.error = _("constant expression required"); \
+ goto failure; \
+ } \
+ if (exp.X_add_number != imm1 && exp.X_add_number != imm2) \
+ { \
+ inst.error = _("immediate value 48 or 64 expected"); \
+ goto failure; \
+ } \
+ inst.operands[i].imm = exp.X_add_number; \
+ } \
+ while (0)
+
+#define po_scalar_or_goto(elsz, label, reg_type) \
do \
{ \
- val = parse_scalar (& str, elsz, & inst.operands[i].vectype); \
+ val = parse_scalar (& str, elsz, & inst.operands[i].vectype, \
+ reg_type); \
if (val == FAIL) \
goto label; \
inst.operands[i].reg = val; \
if (op_parse_code >= OP_FIRST_OPTIONAL)
{
/* Remember where we are in case we need to backtrack. */
- gas_assert (!backtrack_pos);
backtrack_pos = str;
backtrack_error = inst.error;
backtrack_index = i;
case OP_RVS: po_reg_or_fail (REG_TYPE_VFS); break;
case OP_RVD: po_reg_or_fail (REG_TYPE_VFD); break;
case OP_oRND:
+ case OP_RNSDMQR:
+ po_reg_or_goto (REG_TYPE_VFS, try_rndmqr);
+ break;
+ try_rndmqr:
case OP_RNDMQR:
po_reg_or_goto (REG_TYPE_RN, try_rndmq);
break;
break;
/* Also accept generic coprocessor regs for unknown registers. */
coproc_reg:
- po_reg_or_fail (REG_TYPE_CN);
+ po_reg_or_goto (REG_TYPE_CN, vpr_po);
+ break;
+ /* Also accept P0 or p0 for VPR.P0. Since P0 is already an
+ existing register with a value of 0, this seems like the
+ best way to parse P0. */
+ vpr_po:
+ if (strncasecmp (str, "P0", 2) == 0)
+ {
+ str += 2;
+ inst.operands[i].isreg = 1;
+ inst.operands[i].reg = 13;
+ }
+ else
+ goto failure;
break;
case OP_RMF: po_reg_or_fail (REG_TYPE_MVF); break;
case OP_RMD: po_reg_or_fail (REG_TYPE_MVD); break;
try_nq:
case OP_RNQ: po_reg_or_fail (REG_TYPE_NQ); break;
case OP_RNSD: po_reg_or_fail (REG_TYPE_NSD); break;
+ case OP_RNDQMQR:
+ po_reg_or_goto (REG_TYPE_RN, try_rndqmq);
+ break;
+ try_rndqmq:
case OP_oRNDQMQ:
case OP_RNDQMQ:
po_reg_or_goto (REG_TYPE_MQ, try_rndq);
try_rndq:
case OP_oRNDQ:
case OP_RNDQ: po_reg_or_fail (REG_TYPE_NDQ); break;
+ case OP_RVSDMQ:
+ po_reg_or_goto (REG_TYPE_MQ, try_rvsd);
+ break;
+ try_rvsd:
case OP_RVSD: po_reg_or_fail (REG_TYPE_VFSD); break;
+ case OP_RVSD_COND:
+ po_reg_or_goto (REG_TYPE_VFSD, try_cond);
+ break;
+ case OP_oRNSDMQ:
+ case OP_RNSDMQ:
+ po_reg_or_goto (REG_TYPE_NSD, try_mq2);
+ break;
+ try_mq2:
+ po_reg_or_fail (REG_TYPE_MQ);
+ break;
case OP_oRNSDQ:
case OP_RNSDQ: po_reg_or_fail (REG_TYPE_NSDQ); break;
case OP_RNSDQMQR:
po_reg_or_fail (REG_TYPE_NSDQ);
inst.error = 0;
break;
+ case OP_RMQRR:
+ po_reg_or_goto (REG_TYPE_RN, try_rmq);
+ break;
+ try_rmq:
case OP_RMQ:
po_reg_or_fail (REG_TYPE_MQ);
break;
/* Neon scalar. Using an element size of 8 means that some invalid
scalars are accepted here, so deal with those in later code. */
- case OP_RNSC: po_scalar_or_goto (8, failure); break;
+ case OP_RNSC: po_scalar_or_goto (8, failure, REG_TYPE_VFD); break;
case OP_RNDQ_I0:
{
po_reg_or_goto (REG_TYPE_VFSD, try_imm0);
break;
+ case OP_RSVDMQ_FI0:
+ po_reg_or_goto (REG_TYPE_MQ, try_rsvd_fi0);
+ break;
+ try_rsvd_fi0:
case OP_RSVD_FI0:
{
po_reg_or_goto (REG_TYPE_VFSD, try_ifimm0);
case OP_RR_RNSC:
{
- po_scalar_or_goto (8, try_rr);
+ po_scalar_or_goto (8, try_rr, REG_TYPE_VFD);
break;
try_rr:
po_reg_or_fail (REG_TYPE_RN);
}
break;
+ case OP_RNSDQ_RNSC_MQ_RR:
+ po_reg_or_goto (REG_TYPE_RN, try_rnsdq_rnsc_mq);
+ break;
+ try_rnsdq_rnsc_mq:
case OP_RNSDQ_RNSC_MQ:
po_reg_or_goto (REG_TYPE_MQ, try_rnsdq_rnsc);
break;
try_rnsdq_rnsc:
case OP_RNSDQ_RNSC:
{
- po_scalar_or_goto (8, try_nsdq);
+ po_scalar_or_goto (8, try_nsdq, REG_TYPE_VFD);
+ inst.error = 0;
break;
try_nsdq:
po_reg_or_fail (REG_TYPE_NSDQ);
+ inst.error = 0;
}
break;
case OP_RNSD_RNSC:
{
- po_scalar_or_goto (8, try_s_scalar);
+ po_scalar_or_goto (8, try_s_scalar, REG_TYPE_VFD);
break;
try_s_scalar:
- po_scalar_or_goto (4, try_nsd);
+ po_scalar_or_goto (4, try_nsd, REG_TYPE_VFS);
break;
try_nsd:
po_reg_or_fail (REG_TYPE_NSD);
}
break;
+ case OP_RNDQMQ_RNSC_RR:
+ po_reg_or_goto (REG_TYPE_MQ, try_rndq_rnsc_rr);
+ break;
+ try_rndq_rnsc_rr:
+ case OP_RNDQ_RNSC_RR:
+ po_reg_or_goto (REG_TYPE_RN, try_rndq_rnsc);
+ break;
+ case OP_RNDQMQ_RNSC:
+ po_reg_or_goto (REG_TYPE_MQ, try_rndq_rnsc);
+ break;
+ try_rndq_rnsc:
case OP_RNDQ_RNSC:
{
- po_scalar_or_goto (8, try_ndq);
+ po_scalar_or_goto (8, try_ndq, REG_TYPE_VFD);
break;
try_ndq:
po_reg_or_fail (REG_TYPE_NDQ);
case OP_RND_RNSC:
{
- po_scalar_or_goto (8, try_vfd);
+ po_scalar_or_goto (8, try_vfd, REG_TYPE_VFD);
break;
try_vfd:
po_reg_or_fail (REG_TYPE_VFD);
po_misc_or_fail (parse_neon_mov (&str, &i) == FAIL);
break;
+ case OP_RNDQMQ_Ibig:
+ po_reg_or_goto (REG_TYPE_MQ, try_rndq_ibig);
+ break;
+ try_rndq_ibig:
case OP_RNDQ_Ibig:
{
po_reg_or_goto (REG_TYPE_NDQ, try_immbig);
}
break;
+ case OP_RNDQMQ_I63b_RR:
+ po_reg_or_goto (REG_TYPE_MQ, try_rndq_i63b_rr);
+ break;
+ try_rndq_i63b_rr:
+ po_reg_or_goto (REG_TYPE_RN, try_rndq_i63b);
+ break;
+ try_rndq_i63b:
case OP_RNDQ_I63b:
{
po_reg_or_goto (REG_TYPE_NDQ, try_shimm);
case OP_I31: po_imm_or_fail ( 0, 31, FALSE); break;
case OP_I32: po_imm_or_fail ( 1, 32, FALSE); break;
case OP_I32z: po_imm_or_fail ( 0, 32, FALSE); break;
+ case OP_I48_I64: po_imm1_or_imm2_or_fail (48, 64, FALSE); break;
case OP_I63s: po_imm_or_fail (-64, 63, FALSE); break;
case OP_I63: po_imm_or_fail ( 0, 63, FALSE); break;
case OP_I64: po_imm_or_fail ( 1, 64, FALSE); break;
case OP_I64z: po_imm_or_fail ( 0, 64, FALSE); break;
+ case OP_I127: po_imm_or_fail ( 0, 127, FALSE); break;
case OP_I255: po_imm_or_fail ( 0, 255, FALSE); break;
-
+ case OP_I511: po_imm_or_fail ( 0, 511, FALSE); break;
+ case OP_I4095: po_imm_or_fail ( 0, 4095, FALSE); break;
+ case OP_I8191: po_imm_or_fail ( 0, 8191, FALSE); break;
case OP_I4b: po_imm_or_fail ( 1, 4, TRUE); break;
case OP_oI7b:
case OP_I7b: po_imm_or_fail ( 0, 7, TRUE); break;
case OP_RRnpc_I0: po_reg_or_goto (REG_TYPE_RN, I0); break;
I0: po_imm_or_fail (0, 0, FALSE); break;
+ case OP_RRnpcsp_I32: po_reg_or_goto (REG_TYPE_RN, I32); break;
+ I32: po_imm_or_fail (1, 32, FALSE); break;
+
case OP_RF_IF: po_reg_or_goto (REG_TYPE_FN, IF); break;
IF:
if (!is_immediate_prefix (*str))
case OP_CPSF: val = parse_cps_flags (&str); break;
case OP_ENDI: val = parse_endian_specifier (&str); break;
case OP_oROR: val = parse_ror (&str); break;
+ try_cond:
case OP_COND: val = parse_cond (&str); break;
case OP_oBARRIER_I15:
po_barrier_or_imm (str); break;
po_misc_or_fail (parse_shift (&str, i, SHIFT_LSL_OR_ASR_IMMEDIATE));
break;
+ case OP_RMQRZ:
+ case OP_oRMQRZ:
+ po_reg_or_goto (REG_TYPE_MQ, try_rr_zr);
+ break;
+
+ case OP_RR_ZR:
+ try_rr_zr:
+ po_reg_or_goto (REG_TYPE_RN, ZR);
+ break;
+ ZR:
+ po_reg_or_fail (REG_TYPE_ZR);
+ break;
+
default:
as_fatal (_("unhandled operand code %d"), op_parse_code);
}
case OP_oRRnpcsp:
case OP_RRnpcsp:
+ case OP_RRnpcsp_I32:
if (inst.operands[i].isreg)
{
if (inst.operands[i].reg == REG_PC)
inst.error = BAD_PC;
break;
+ case OP_RVSD_COND:
case OP_VLDR:
if (inst.operands[i].isreg)
break;
/* fall through. */
+
case OP_CPSF:
case OP_ENDI:
case OP_oROR:
inst.error = _("operand must be LR register");
break;
+ case OP_RMQRZ:
+ case OP_oRMQRZ:
+ case OP_RR_ZR:
+ if (!inst.operands[i].iszr && inst.operands[i].reg == REG_PC)
+ inst.error = BAD_PC;
+ break;
+
case OP_RRe:
if (inst.operands[i].isreg
&& (inst.operands[i].reg & 0x00000001) != 0)
inst.instruction |= (imm & 0x0800) << 15;
inst.instruction |= (imm & 0x0700) << 4;
inst.instruction |= (imm & 0x00ff);
+ /* In case this replacement is being done on Armv8-M
+ Baseline we need to make sure to disable the
+ instruction size check, as otherwise GAS will reject
+ the use of this T32 instruction. */
+ inst.size_req = 0;
return TRUE;
}
}
return;
}
- /* MVFR2 is only valid at ARMv8-A. */
- if (inst.operands[1].reg == 5)
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
- _(BAD_FPU));
+ switch (inst.operands[1].reg)
+ {
+ /* MVFR2 is only valid for Armv8-A. */
+ case 5:
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+ _(BAD_FPU));
+ break;
+
+ /* Check for new Armv8.1-M Mainline changes to <spec_reg>. */
+ case 1: /* fpscr. */
+ constraint (!(ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+ || ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)),
+ _(BAD_FPU));
+ break;
+
+ case 14: /* fpcxt_ns. */
+ case 15: /* fpcxt_s. */
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main),
+ _("selected processor does not support instruction"));
+ break;
+
+ case 2: /* fpscr_nzcvqc. */
+ case 12: /* vpr. */
+ case 13: /* p0. */
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main)
+ || (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)),
+ _("selected processor does not support instruction"));
+ if (inst.operands[0].reg != 2
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ as_warn (_("accessing MVE system register without MVE is UNPREDICTABLE"));
+ break;
+
+ default:
+ break;
+ }
/* APSR_ sets isvec. All other refs to PC are illegal. */
if (!inst.operands[0].isvec && Rt == REG_PC)
return;
}
- /* MVFR2 is only valid for ARMv8-A. */
- if (inst.operands[0].reg == 5)
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
- _(BAD_FPU));
-
- /* If we get through parsing the register name, we just insert the number
- generated into the instruction without further validation. */
+ switch (inst.operands[0].reg)
+ {
+ /* MVFR2 is only valid for Armv8-A. */
+ case 5:
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+ _(BAD_FPU));
+ break;
+
+ /* Check for new Armv8.1-M Mainline changes to <spec_reg>. */
+ case 1: /* fpcr. */
+ constraint (!(ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+ || ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)),
+ _(BAD_FPU));
+ break;
+
+ case 14: /* fpcxt_ns. */
+ case 15: /* fpcxt_s. */
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main),
+ _("selected processor does not support instruction"));
+ break;
+
+ case 2: /* fpscr_nzcvqc. */
+ case 12: /* vpr. */
+ case 13: /* p0. */
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main)
+ || (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)),
+ _("selected processor does not support instruction"));
+ if (inst.operands[0].reg != 2
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ as_warn (_("accessing MVE system register without MVE is UNPREDICTABLE"));
+ break;
+
+ default:
+ break;
+ }
+
+ /* If we get through parsing the register name, we just insert the number
+ generated into the instruction without further validation. */
inst.instruction |= (inst.operands[0].reg << 16);
inst.instruction |= (Rt << 12);
}
static void
do_smc (void)
{
+ unsigned int value = inst.relocs[0].exp.X_add_number;
+ constraint (value > 0xf, _("immediate too large (bigger than 0xF)"));
+
inst.relocs[0].type = BFD_RELOC_ARM_SMC;
inst.relocs[0].pc_rel = 0;
}
static void
do_vfp_sp_monadic (void)
{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
+
encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm);
}
static void
do_vfp_reg_from_sp (void)
{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
+
inst.instruction |= inst.operands[0].reg << 12;
encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sn);
}
static void
do_vfp_sp_from_reg (void)
{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
+
encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sn);
inst.instruction |= inst.operands[1].reg << 12;
}
static void
do_vfp_dp_rd_rm (void)
{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
+
encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dm);
}
static void
do_vfp_dp_rd_rn_rm (void)
{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
+
encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dn);
encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dm);
static void
do_vfp_dp_rm_rd_rn (void)
{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
+
encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dm);
encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd);
encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dn);
inst.error = _("instruction does not accept unindexed addressing");
}
-/* Table of Thumb instructions which exist in both 16- and 32-bit
+/* Table of Thumb instructions which exist in 16- and/or 32-bit
encodings (the latter only in post-V6T2 cores). The index is the
value used in the insns table below. When there is more than one
possible 16-bit encoding for the instruction, this table always
X(_bflx, 0000, f070e001), \
X(_bic, 4380, ea200000), \
X(_bics, 4380, ea300000), \
+ X(_cinc, 0000, ea509000), \
+ X(_cinv, 0000, ea50a000), \
X(_cmn, 42c0, eb100f00), \
X(_cmp, 2800, ebb00f00), \
+ X(_cneg, 0000, ea50b000), \
X(_cpsie, b660, f3af8400), \
X(_cpsid, b670, f3af8600), \
X(_cpy, 4600, ea4f0000), \
+ X(_csel, 0000, ea508000), \
+ X(_cset, 0000, ea5f900f), \
+ X(_csetm, 0000, ea5fa00f), \
+ X(_csinc, 0000, ea509000), \
+ X(_csinv, 0000, ea50a000), \
+ X(_csneg, 0000, ea50b000), \
X(_dec_sp,80dd, f1ad0d00), \
X(_dls, 0000, f040e001), \
+ X(_dlstp, 0000, f000e001), \
X(_eor, 4040, ea800000), \
X(_eors, 4040, ea900000), \
X(_inc_sp,00dd, f10d0d00), \
+ X(_lctp, 0000, f00fe001), \
X(_ldmia, c800, e8900000), \
X(_ldr, 6800, f8500000), \
X(_ldrb, 7800, f8100000), \
X(_ldr_pc2,4800, f85f0000), \
X(_ldr_sp,9800, f85d0000), \
X(_le, 0000, f00fc001), \
+ X(_letp, 0000, f01fc001), \
X(_lsl, 0000, fa00f000), \
X(_lsls, 0000, fa10f000), \
X(_lsr, 0800, fa20f000), \
X(_wfe, bf20, f3af8002), \
X(_wfi, bf30, f3af8003), \
X(_wls, 0000, f040c001), \
+ X(_wlstp, 0000, f000c001), \
X(_sev, bf40, f3af8004), \
X(_sevl, bf50, f3af8005), \
X(_udf, de00, f7f0a000)
inst.instruction |= Rm;
}
+/* For the Armv8.1-M conditional instructions. */
+static void
+do_t_cond (void)
+{
+ unsigned Rd, Rn, Rm;
+ signed int cond;
+
+ constraint (inst.cond != COND_ALWAYS, BAD_COND);
+
+ Rd = inst.operands[0].reg;
+ switch (inst.instruction)
+ {
+ case T_MNEM_csinc:
+ case T_MNEM_csinv:
+ case T_MNEM_csneg:
+ case T_MNEM_csel:
+ Rn = inst.operands[1].reg;
+ Rm = inst.operands[2].reg;
+ cond = inst.operands[3].imm;
+ constraint (Rn == REG_SP, BAD_SP);
+ constraint (Rm == REG_SP, BAD_SP);
+ break;
+
+ case T_MNEM_cinc:
+ case T_MNEM_cinv:
+ case T_MNEM_cneg:
+ Rn = inst.operands[1].reg;
+ cond = inst.operands[2].imm;
+ /* Invert the last bit to invert the cond. */
+ cond = TOGGLE_BIT (cond, 0);
+ constraint (Rn == REG_SP, BAD_SP);
+ Rm = Rn;
+ break;
+
+ case T_MNEM_csetm:
+ case T_MNEM_cset:
+ cond = inst.operands[1].imm;
+ /* Invert the last bit to invert the cond. */
+ cond = TOGGLE_BIT (cond, 0);
+ Rn = REG_PC;
+ Rm = REG_PC;
+ break;
+
+ default: abort ();
+ }
+
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+ inst.instruction = THUMB_OP32 (inst.instruction);
+ inst.instruction |= Rd << 8;
+ inst.instruction |= Rn << 16;
+ inst.instruction |= Rm;
+ inst.instruction |= cond << 4;
+}
+
static void
do_t_csdb (void)
{
inst.instruction |= cond << 4;
}
-static void
-do_mve_vpt (void)
-{
- /* We are dealing with a vector predicated block. */
- set_pred_insn_type (VPT_INSN);
- now_pred.cc = 0;
- now_pred.mask = ((inst.instruction & 0x00400000) >> 19)
- | ((inst.instruction & 0xe000) >> 13);
- now_pred.warn_deprecated = FALSE;
- now_pred.type = VECTOR_PRED;
-}
-
/* Helper function used for both push/pop and ldm/stm. */
static void
encode_thumb2_multi (bfd_boolean do_io, int base, unsigned mask,
_("SMC is not permitted on this architecture"));
constraint (inst.relocs[0].exp.X_op != O_constant,
_("expression too complex"));
+ constraint (value > 0xf, _("immediate too large (bigger than 0xF)"));
+
inst.relocs[0].type = BFD_RELOC_UNUSED;
- inst.instruction |= (value & 0xf000) >> 12;
- inst.instruction |= (value & 0x0ff0);
inst.instruction |= (value & 0x000f) << 16;
+
/* PR gas/15623: SMC instructions must be last in an IT block. */
set_pred_insn_type_last ();
}
}
}
-/* To handle the Scalar Low Overhead Loop instructions
- in Armv8.1-M Mainline. */
+/* For shifts with four operands in MVE. */
static void
-do_t_loloop (void)
+do_mve_scalar_shift1 (void)
{
- unsigned long insn = inst.instruction;
+ unsigned int value = inst.operands[2].imm;
- set_pred_insn_type (OUTSIDE_PRED_INSN);
- inst.instruction = THUMB_OP32 (inst.instruction);
+ inst.instruction |= inst.operands[0].reg << 16;
+ inst.instruction |= inst.operands[1].reg << 8;
- switch (insn)
- {
- case T_MNEM_le:
- /* le <label>. */
- if (!inst.operands[0].present)
- inst.instruction |= 1 << 21;
+ /* Setting the bit for saturation. */
+ inst.instruction |= ((value == 64) ? 0: 1) << 7;
- v8_1_loop_reloc (TRUE);
- break;
+ /* Assuming Rm is already checked not to be 11x1. */
+ constraint (inst.operands[3].reg == inst.operands[0].reg, BAD_OVERLAP);
+ constraint (inst.operands[3].reg == inst.operands[1].reg, BAD_OVERLAP);
+ inst.instruction |= inst.operands[3].reg << 12;
+}
- case T_MNEM_wls:
- v8_1_loop_reloc (FALSE);
- /* Fall through. */
- case T_MNEM_dls:
- constraint (inst.operands[1].isreg != 1, BAD_ARGS);
- inst.instruction |= (inst.operands[1].reg << 16);
- break;
+/* For shifts in MVE. */
+static void
+do_mve_scalar_shift (void)
+{
+ if (!inst.operands[2].present)
+ {
+ inst.operands[2] = inst.operands[1];
+ inst.operands[1].reg = 0xf;
+ }
- default: abort();
+ inst.instruction |= inst.operands[0].reg << 16;
+ inst.instruction |= inst.operands[1].reg << 8;
+
+ if (inst.operands[2].isreg)
+ {
+ /* Assuming Rm is already checked not to be 11x1. */
+ constraint (inst.operands[2].reg == inst.operands[0].reg, BAD_OVERLAP);
+ constraint (inst.operands[2].reg == inst.operands[1].reg, BAD_OVERLAP);
+ inst.instruction |= inst.operands[2].reg << 12;
+ }
+ else
+ {
+ /* Assuming imm is already checked as [1,32]. */
+ unsigned int value = inst.operands[2].imm;
+ inst.instruction |= (value & 0x1c) << 10;
+ inst.instruction |= (value & 0x03) << 6;
+ /* Change last 4 bits from 0xd to 0xf. */
+ inst.instruction |= 0x2;
}
}
#define M_MNEM_vmlsdavax 0xeef01e21
#define M_MNEM_vmullt 0xee011e00
#define M_MNEM_vmullb 0xee010e00
+#define M_MNEM_vctp 0xf000e801
#define M_MNEM_vst20 0xfc801e00
#define M_MNEM_vst21 0xfc801e20
#define M_MNEM_vst40 0xfc801e01
#define M_MNEM_vldrh 0xec100e10
#define M_MNEM_vldrw 0xec100e40
#define M_MNEM_vldrd 0xec100e50
+#define M_MNEM_vmovlt 0xeea01f40
+#define M_MNEM_vmovlb 0xeea00f40
+#define M_MNEM_vmovnt 0xfe311e81
+#define M_MNEM_vmovnb 0xfe310e81
+#define M_MNEM_vadc 0xee300f00
+#define M_MNEM_vadci 0xee301f00
+#define M_MNEM_vbrsr 0xfe011e60
+#define M_MNEM_vaddlv 0xee890f00
+#define M_MNEM_vaddlva 0xee890f20
+#define M_MNEM_vaddv 0xeef10f00
+#define M_MNEM_vaddva 0xeef10f20
+#define M_MNEM_vddup 0xee011f6e
+#define M_MNEM_vdwdup 0xee011f60
+#define M_MNEM_vidup 0xee010f6e
+#define M_MNEM_viwdup 0xee010f60
+#define M_MNEM_vmaxv 0xeee20f00
+#define M_MNEM_vmaxav 0xeee00f00
+#define M_MNEM_vminv 0xeee20f80
+#define M_MNEM_vminav 0xeee00f80
+#define M_MNEM_vmlaldav 0xee800e00
+#define M_MNEM_vmlaldava 0xee800e20
+#define M_MNEM_vmlaldavx 0xee801e00
+#define M_MNEM_vmlaldavax 0xee801e20
+#define M_MNEM_vmlsldav 0xee800e01
+#define M_MNEM_vmlsldava 0xee800e21
+#define M_MNEM_vmlsldavx 0xee801e01
+#define M_MNEM_vmlsldavax 0xee801e21
+#define M_MNEM_vrmlaldavhx 0xee801f00
+#define M_MNEM_vrmlaldavhax 0xee801f20
+#define M_MNEM_vrmlsldavh 0xfe800e01
+#define M_MNEM_vrmlsldavha 0xfe800e21
+#define M_MNEM_vrmlsldavhx 0xfe801e01
+#define M_MNEM_vrmlsldavhax 0xfe801e21
+#define M_MNEM_vqmovnt 0xee331e01
+#define M_MNEM_vqmovnb 0xee330e01
+#define M_MNEM_vqmovunt 0xee311e81
+#define M_MNEM_vqmovunb 0xee310e81
+#define M_MNEM_vshrnt 0xee801fc1
+#define M_MNEM_vshrnb 0xee800fc1
+#define M_MNEM_vrshrnt 0xfe801fc1
+#define M_MNEM_vqshrnt 0xee801f40
+#define M_MNEM_vqshrnb 0xee800f40
+#define M_MNEM_vqshrunt 0xee801fc0
+#define M_MNEM_vqshrunb 0xee800fc0
+#define M_MNEM_vrshrnb 0xfe800fc1
+#define M_MNEM_vqrshrnt 0xee801f41
+#define M_MNEM_vqrshrnb 0xee800f41
+#define M_MNEM_vqrshrunt 0xfe801fc0
+#define M_MNEM_vqrshrunb 0xfe800fc0
+
+/* Bfloat16 instruction encoder helpers. */
+#define B_MNEM_vfmat 0xfc300850
+#define B_MNEM_vfmab 0xfc300810
/* Neon instruction encoder helpers. */
- a table used to drive neon_select_shape. */
#define NEON_SHAPE_DEF \
+ X(4, (R, R, Q, Q), QUAD), \
+ X(4, (Q, R, R, I), QUAD), \
+ X(4, (R, R, S, S), QUAD), \
+ X(4, (S, S, R, R), QUAD), \
+ X(3, (Q, R, I), QUAD), \
+ X(3, (I, Q, Q), QUAD), \
+ X(3, (I, Q, R), QUAD), \
X(3, (R, Q, Q), QUAD), \
X(3, (D, D, D), DOUBLE), \
X(3, (Q, Q, Q), QUAD), \
X(3, (D, D, S), DOUBLE), \
X(3, (Q, Q, S), QUAD), \
X(3, (Q, Q, R), QUAD), \
+ X(3, (R, R, Q), QUAD), \
+ X(2, (R, Q), QUAD), \
X(2, (D, D), DOUBLE), \
X(2, (Q, Q), QUAD), \
X(2, (D, S), DOUBLE), \
X(2, (Q, R), QUAD), \
X(2, (D, I), DOUBLE), \
X(2, (Q, I), QUAD), \
+ X(3, (P, F, I), SINGLE), \
+ X(3, (P, D, I), DOUBLE), \
+ X(3, (P, Q, I), QUAD), \
+ X(4, (P, F, F, I), SINGLE), \
+ X(4, (P, D, D, I), DOUBLE), \
+ X(4, (P, Q, Q, I), QUAD), \
+ X(5, (P, F, F, F, I), SINGLE), \
+ X(5, (P, D, D, D, I), DOUBLE), \
+ X(5, (P, Q, Q, Q, I), QUAD), \
X(3, (D, L, D), DOUBLE), \
X(2, (D, Q), MIXED), \
X(2, (Q, D), MIXED), \
X(2, (R, S), SINGLE), \
X(2, (F, R), SINGLE), \
X(2, (R, F), SINGLE), \
+/* Used for MVE tail predicated loop instructions. */\
+ X(2, (R, R), QUAD), \
/* Half float shape supported so far. */\
X (2, (H, D), MIXED), \
X (2, (D, H), MIXED), \
#define S2(A,B) NS_##A##B
#define S3(A,B,C) NS_##A##B##C
#define S4(A,B,C,D) NS_##A##B##C##D
+#define S5(A,B,C,D,E) NS_##A##B##C##D##E
#define X(N, L, C) S##N L
#undef S2
#undef S3
#undef S4
+#undef S5
enum neon_shape_class
{
SE_I,
SE_S,
SE_R,
- SE_L
+ SE_L,
+ SE_P
};
/* Register widths of above. */
0,
32,
32,
+ 0,
0
};
#define S2(A,B) { SE_##A, SE_##B }
#define S3(A,B,C) { SE_##A, SE_##B, SE_##C }
#define S4(A,B,C,D) { SE_##A, SE_##B, SE_##C, SE_##D }
+#define S5(A,B,C,D,E) { SE_##A, SE_##B, SE_##C, SE_##D, SE_##E }
#define X(N, L, C) { N, S##N L }
#undef S2
#undef S3
#undef S4
+#undef S5
/* Bit masks used in type checking given instructions.
'N_EQK' means the type must be the same as (or based on in some way) the key
N_F32 = 0x0080000,
N_F64 = 0x0100000,
N_P64 = 0x0200000,
+ N_BF16 = 0x0400000,
N_KEY = 0x1000000, /* Key element (main type specifier). */
N_EQK = 0x2000000, /* Given operand has the same type & size as the key. */
N_VFP = 0x4000000, /* VFP mode: operand size must match register width. */
matches = 0;
break;
+ case SE_P:
case SE_L:
break;
}
}
break;
+ case NT_bfloat:
+ if (size == 16) return N_BF16;
+ break;
+
default: ;
}
if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
*size = 8;
- else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16)) != 0)
+ else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16 | N_BF16))
+ != 0)
*size = 16;
else if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
*size = 32;
*type = NT_poly;
else if ((mask & (N_F_ALL)) != 0)
*type = NT_float;
+ else if ((mask & (N_BF16)) != 0)
+ *type = NT_bfloat;
else
return FAIL;
}
-static void
-do_vfp_nsyn_cmp (void)
+/* Turn a size (8, 16, 32, 64) into the respective bit number minus 3
+ (0, 1, 2, 3). */
+
+static unsigned
+neon_logbits (unsigned x)
{
- enum neon_shape rs;
- if (inst.operands[1].isreg)
- {
- rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
- neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
+ return ffs (x) - 4;
+}
- if (rs == NS_FF || rs == NS_HH)
- {
- NEON_ENCODE (SINGLE, inst);
- do_vfp_sp_monadic ();
- }
- else
- {
- NEON_ENCODE (DOUBLE, inst);
- do_vfp_dp_rd_rm ();
- }
- }
- else
- {
- rs = neon_select_shape (NS_HI, NS_FI, NS_DI, NS_NULL);
- neon_check_type (2, rs, N_F_ALL | N_KEY | N_VFP, N_EQK);
+#define LOW4(R) ((R) & 0xf)
+#define HI1(R) (((R) >> 4) & 1)
+#define LOW1(R) ((R) & 0x1)
+#define HI4(R) (((R) >> 1) & 0xf)
- switch (inst.instruction & 0x0fffffff)
+static unsigned
+mve_get_vcmp_vpt_cond (struct neon_type_el et)
+{
+ switch (et.type)
+ {
+ default:
+ first_error (BAD_EL_TYPE);
+ return 0;
+ case NT_float:
+ switch (inst.operands[0].imm)
{
- case N_MNEM_vcmp:
- inst.instruction += N_MNEM_vcmpz - N_MNEM_vcmp;
- break;
- case N_MNEM_vcmpe:
- inst.instruction += N_MNEM_vcmpez - N_MNEM_vcmpe;
- break;
default:
- abort ();
+ first_error (_("invalid condition"));
+ return 0;
+ case 0x0:
+ /* eq. */
+ return 0;
+ case 0x1:
+ /* ne. */
+ return 1;
+ case 0xa:
+ /* ge/ */
+ return 4;
+ case 0xb:
+ /* lt. */
+ return 5;
+ case 0xc:
+ /* gt. */
+ return 6;
+ case 0xd:
+ /* le. */
+ return 7;
}
-
- if (rs == NS_FI || rs == NS_HI)
+ case NT_integer:
+ /* only accept eq and ne. */
+ if (inst.operands[0].imm > 1)
{
- NEON_ENCODE (SINGLE, inst);
- do_vfp_sp_compare_z ();
+ first_error (_("invalid condition"));
+ return 0;
}
+ return inst.operands[0].imm;
+ case NT_unsigned:
+ if (inst.operands[0].imm == 0x2)
+ return 2;
+ else if (inst.operands[0].imm == 0x8)
+ return 3;
else
{
- NEON_ENCODE (DOUBLE, inst);
- do_vfp_dp_rd ();
+ first_error (_("invalid condition"));
+ return 0;
+ }
+ case NT_signed:
+ switch (inst.operands[0].imm)
+ {
+ default:
+ first_error (_("invalid condition"));
+ return 0;
+ case 0xa:
+ /* ge. */
+ return 4;
+ case 0xb:
+ /* lt. */
+ return 5;
+ case 0xc:
+ /* gt. */
+ return 6;
+ case 0xd:
+ /* le. */
+ return 7;
}
}
- do_vfp_cond_or_thumb ();
-
- /* ARMv8.2 fp16 instruction. */
- if (rs == NS_HI || rs == NS_HH)
- do_scalar_fp16_v82_encode ();
+ /* Should be unreachable. */
+ abort ();
}
+/* For VCTP (create vector tail predicate) in MVE. */
static void
-nsyn_insert_sp (void)
+do_mve_vctp (void)
{
- inst.operands[1] = inst.operands[0];
- memset (&inst.operands[0], '\0', sizeof (inst.operands[0]));
- inst.operands[0].reg = REG_SP;
- inst.operands[0].isreg = 1;
- inst.operands[0].writeback = 1;
- inst.operands[0].present = 1;
-}
+ int dt = 0;
+ unsigned size = 0x0;
-static void
-do_vfp_nsyn_push (void)
-{
- nsyn_insert_sp ();
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
- constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
- _("register list must contain at least 1 and at most 16 "
- "registers"));
+ /* This is a typical MVE instruction which has no type but have size 8, 16,
+ 32 and 64. For instructions with no type, inst.vectype.el[j].type is set
+ to NT_untyped and size is updated in inst.vectype.el[j].size. */
+ if ((inst.operands[0].present) && (inst.vectype.el[0].type == NT_untyped))
+ dt = inst.vectype.el[0].size;
- if (inst.operands[1].issingle)
- do_vfp_nsyn_opcode ("fstmdbs");
- else
- do_vfp_nsyn_opcode ("fstmdbd");
+ /* Setting this does not indicate an actual NEON instruction, but only
+ indicates that the mnemonic accepts neon-style type suffixes. */
+ inst.is_neon = 1;
+
+ switch (dt)
+ {
+ case 8:
+ break;
+ case 16:
+ size = 0x1; break;
+ case 32:
+ size = 0x2; break;
+ case 64:
+ size = 0x3; break;
+ default:
+ first_error (_("Type is not allowed for this instruction"));
+ }
+ inst.instruction |= size << 20;
+ inst.instruction |= inst.operands[0].reg << 16;
}
static void
-do_vfp_nsyn_pop (void)
+do_mve_vpt (void)
{
- nsyn_insert_sp ();
+ /* We are dealing with a vector predicated block. */
+ if (inst.operands[0].present)
+ {
+ enum neon_shape rs = neon_select_shape (NS_IQQ, NS_IQR, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_KEY | N_F_MVE | N_I_MVE | N_SU_32,
+ N_EQK);
- constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
- _("register list must contain at least 1 and at most 16 "
- "registers"));
+ unsigned fcond = mve_get_vcmp_vpt_cond (et);
- if (inst.operands[1].issingle)
- do_vfp_nsyn_opcode ("fldmias");
- else
- do_vfp_nsyn_opcode ("fldmiad");
-}
+ constraint (inst.operands[1].reg > 14, MVE_BAD_QREG);
-/* Fix up Neon data-processing instructions, ORing in the correct bits for
- ARM mode or Thumb mode and moving the encoded bit 24 to bit 28. */
+ if (et.type == NT_invtype)
+ return;
+
+ if (et.type == NT_float)
+ {
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext),
+ BAD_FPU);
+ constraint (et.size != 16 && et.size != 32, BAD_EL_TYPE);
+ inst.instruction |= (et.size == 16) << 28;
+ inst.instruction |= 0x3 << 20;
+ }
+ else
+ {
+ constraint (et.size != 8 && et.size != 16 && et.size != 32,
+ BAD_EL_TYPE);
+ inst.instruction |= 1 << 28;
+ inst.instruction |= neon_logbits (et.size) << 20;
+ }
+
+ if (inst.operands[2].isquad)
+ {
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= (fcond & 0x2) >> 1;
+ }
+ else
+ {
+ if (inst.operands[2].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ inst.instruction |= 1 << 6;
+ inst.instruction |= (fcond & 0x2) << 4;
+ inst.instruction |= inst.operands[2].reg;
+ }
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= (fcond & 0x4) << 10;
+ inst.instruction |= (fcond & 0x1) << 7;
+
+ }
+ set_pred_insn_type (VPT_INSN);
+ now_pred.cc = 0;
+ now_pred.mask = ((inst.instruction & 0x00400000) >> 19)
+ | ((inst.instruction & 0xe000) >> 13);
+ now_pred.warn_deprecated = FALSE;
+ now_pred.type = VECTOR_PRED;
+ inst.is_neon = 1;
+}
static void
-neon_dp_fixup (struct arm_it* insn)
+do_mve_vcmp (void)
{
- unsigned int i = insn->instruction;
- insn->is_neon = 1;
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+ if (!inst.operands[1].isreg || !inst.operands[1].isquad)
+ first_error (_(reg_expected_msgs[REG_TYPE_MQ]));
+ if (!inst.operands[2].present)
+ first_error (_("MVE vector or ARM register expected"));
+ constraint (inst.operands[1].reg > 14, MVE_BAD_QREG);
- if (thumb_mode)
+ /* Deal with 'else' conditional MVE's vcmp, it will be parsed as vcmpe. */
+ if ((inst.instruction & 0xffffffff) == N_MNEM_vcmpe
+ && inst.operands[1].isquad)
{
- /* The U bit is at bit 24 by default. Move to bit 28 in Thumb mode. */
- if (i & (1 << 24))
- i |= 1 << 28;
-
- i &= ~(1 << 24);
-
- i |= 0xef000000;
+ inst.instruction = N_MNEM_vcmp;
+ inst.cond = 0x10;
}
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
else
- i |= 0xf2000000;
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
- insn->instruction = i;
+ enum neon_shape rs = neon_select_shape (NS_IQQ, NS_IQR, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_KEY | N_F_MVE | N_I_MVE | N_SU_32,
+ N_EQK);
+
+ constraint (rs == NS_IQR && inst.operands[2].reg == REG_PC
+ && !inst.operands[2].iszr, BAD_PC);
+
+ unsigned fcond = mve_get_vcmp_vpt_cond (et);
+
+ inst.instruction = 0xee010f00;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= (fcond & 0x4) << 10;
+ inst.instruction |= (fcond & 0x1) << 7;
+ if (et.type == NT_float)
+ {
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext),
+ BAD_FPU);
+ inst.instruction |= (et.size == 16) << 28;
+ inst.instruction |= 0x3 << 20;
+ }
+ else
+ {
+ inst.instruction |= 1 << 28;
+ inst.instruction |= neon_logbits (et.size) << 20;
+ }
+ if (inst.operands[2].isquad)
+ {
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= (fcond & 0x2) >> 1;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ }
+ else
+ {
+ if (inst.operands[2].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ inst.instruction |= 1 << 6;
+ inst.instruction |= (fcond & 0x2) << 4;
+ inst.instruction |= inst.operands[2].reg;
+ }
+
+ inst.is_neon = 1;
+ return;
}
-/* Turn a size (8, 16, 32, 64) into the respective bit number minus 3
- (0, 1, 2, 3). */
+static void
+do_mve_vmaxa_vmina (void)
+{
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
-static unsigned
-neon_logbits (unsigned x)
+ enum neon_shape rs = neon_select_shape (NS_QQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (2, rs, N_EQK, N_KEY | N_S8 | N_S16 | N_S32);
+
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= neon_logbits (et.size) << 18;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.is_neon = 1;
+}
+
+static void
+do_mve_vfmas (void)
{
- return ffs (x) - 4;
+ enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_F_MVE | N_KEY, N_EQK, N_EQK);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ if (inst.operands[2].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[2].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+
+ inst.instruction |= (et.size == 16) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= inst.operands[2].reg;
+ inst.is_neon = 1;
}
-#define LOW4(R) ((R) & 0xf)
-#define HI1(R) (((R) >> 4) & 1)
+static void
+do_mve_viddup (void)
+{
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ unsigned imm = inst.relocs[0].exp.X_add_number;
+ constraint (imm != 1 && imm != 2 && imm != 4 && imm != 8,
+ _("immediate must be either 1, 2, 4 or 8"));
+
+ enum neon_shape rs;
+ struct neon_type_el et;
+ unsigned Rm;
+ if (inst.instruction == M_MNEM_vddup || inst.instruction == M_MNEM_vidup)
+ {
+ rs = neon_select_shape (NS_QRI, NS_NULL);
+ et = neon_check_type (2, rs, N_KEY | N_U8 | N_U16 | N_U32, N_EQK);
+ Rm = 7;
+ }
+ else
+ {
+ constraint ((inst.operands[2].reg % 2) != 1, BAD_EVEN);
+ if (inst.operands[2].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[2].reg == REG_PC)
+ first_error (BAD_PC);
+
+ rs = neon_select_shape (NS_QRRI, NS_NULL);
+ et = neon_check_type (3, rs, N_KEY | N_U8 | N_U16 | N_U32, N_EQK, N_EQK);
+ Rm = inst.operands[2].reg >> 1;
+ }
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= neon_logbits (et.size) << 20;
+ inst.instruction |= inst.operands[1].reg << 16;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= (imm > 2) << 7;
+ inst.instruction |= Rm << 1;
+ inst.instruction |= (imm == 2 || imm == 8);
+ inst.is_neon = 1;
+}
+
+static void
+do_mve_vmlas (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
+
+ if (inst.operands[2].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+ else if (inst.operands[2].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ inst.instruction |= (et.type == NT_unsigned) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= neon_logbits (et.size) << 20;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= inst.operands[2].reg;
+ inst.is_neon = 1;
+}
+
+static void
+do_mve_vshll (void)
+{
+ struct neon_type_el et
+ = neon_check_type (2, NS_QQI, N_EQK, N_S8 | N_U8 | N_S16 | N_U16 | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ int imm = inst.operands[2].imm;
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate value out of range"));
+
+ if ((unsigned)imm == et.size)
+ {
+ inst.instruction |= neon_logbits (et.size) << 18;
+ inst.instruction |= 0x110001;
+ }
+ else
+ {
+ inst.instruction |= (et.size + imm) << 16;
+ inst.instruction |= 0x800140;
+ }
+
+ inst.instruction |= (et.type == NT_unsigned) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.is_neon = 1;
+}
+
+static void
+do_mve_vshlc (void)
+{
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ if (inst.operands[1].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+ else if (inst.operands[1].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+
+ int imm = inst.operands[2].imm;
+ constraint (imm < 1 || imm > 32, _("immediate value out of range"));
+
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= (imm & 0x1f) << 16;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= inst.operands[1].reg;
+ inst.is_neon = 1;
+}
+
+static void
+do_mve_vshrn (void)
+{
+ unsigned types;
+ switch (inst.instruction)
+ {
+ case M_MNEM_vshrnt:
+ case M_MNEM_vshrnb:
+ case M_MNEM_vrshrnt:
+ case M_MNEM_vrshrnb:
+ types = N_I16 | N_I32;
+ break;
+ case M_MNEM_vqshrnt:
+ case M_MNEM_vqshrnb:
+ case M_MNEM_vqrshrnt:
+ case M_MNEM_vqrshrnb:
+ types = N_U16 | N_U32 | N_S16 | N_S32;
+ break;
+ case M_MNEM_vqshrunt:
+ case M_MNEM_vqshrunb:
+ case M_MNEM_vqrshrunt:
+ case M_MNEM_vqrshrunb:
+ types = N_S16 | N_S32;
+ break;
+ default:
+ abort ();
+ }
+
+ struct neon_type_el et = neon_check_type (2, NS_QQI, N_EQK, types | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ unsigned Qd = inst.operands[0].reg;
+ unsigned Qm = inst.operands[1].reg;
+ unsigned imm = inst.operands[2].imm;
+ constraint (imm < 1 || ((unsigned) imm) > (et.size / 2),
+ et.size == 16
+ ? _("immediate operand expected in the range [1,8]")
+ : _("immediate operand expected in the range [1,16]"));
+
+ inst.instruction |= (et.type == NT_unsigned) << 28;
+ inst.instruction |= HI1 (Qd) << 22;
+ inst.instruction |= (et.size - imm) << 16;
+ inst.instruction |= LOW4 (Qd) << 12;
+ inst.instruction |= HI1 (Qm) << 5;
+ inst.instruction |= LOW4 (Qm);
+ inst.is_neon = 1;
+}
+
+static void
+do_mve_vqmovn (void)
+{
+ struct neon_type_el et;
+ if (inst.instruction == M_MNEM_vqmovnt
+ || inst.instruction == M_MNEM_vqmovnb)
+ et = neon_check_type (2, NS_QQ, N_EQK,
+ N_U16 | N_U32 | N_S16 | N_S32 | N_KEY);
+ else
+ et = neon_check_type (2, NS_QQ, N_EQK, N_S16 | N_S32 | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ inst.instruction |= (et.type == NT_unsigned) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= (et.size == 32) << 18;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.is_neon = 1;
+}
+
+static void
+do_mve_vpsel (void)
+{
+ neon_select_shape (NS_QQQ, NS_NULL);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.is_neon = 1;
+}
+
+static void
+do_mve_vpnot (void)
+{
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+}
+
+static void
+do_mve_vmaxnma_vminnma (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_QQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (2, rs, N_EQK, N_F_MVE | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ inst.instruction |= (et.size == 16) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.is_neon = 1;
+}
+
+static void
+do_mve_vcmul (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_QQQI, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_F_MVE | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ unsigned rot = inst.relocs[0].exp.X_add_number;
+ constraint (rot != 0 && rot != 90 && rot != 180 && rot != 270,
+ _("immediate out of range"));
+
+ if (et.size == 32 && (inst.operands[0].reg == inst.operands[1].reg
+ || inst.operands[0].reg == inst.operands[2].reg))
+ as_tsktsk (BAD_MVE_SRCDEST);
+
+ inst.instruction |= (et.size == 32) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= (rot > 90) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= (rot == 90 || rot == 270);
+ inst.is_neon = 1;
+}
+
+/* To handle the Low Overhead Loop instructions
+ in Armv8.1-M Mainline and MVE. */
+static void
+do_t_loloop (void)
+{
+ unsigned long insn = inst.instruction;
+
+ inst.instruction = THUMB_OP32 (inst.instruction);
+
+ if (insn == T_MNEM_lctp)
+ return;
+
+ set_pred_insn_type (MVE_OUTSIDE_PRED_INSN);
+
+ if (insn == T_MNEM_wlstp || insn == T_MNEM_dlstp)
+ {
+ struct neon_type_el et
+ = neon_check_type (2, NS_RR, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ inst.instruction |= neon_logbits (et.size) << 20;
+ inst.is_neon = 1;
+ }
+
+ switch (insn)
+ {
+ case T_MNEM_letp:
+ constraint (!inst.operands[0].present,
+ _("expected LR"));
+ /* fall through. */
+ case T_MNEM_le:
+ /* le <label>. */
+ if (!inst.operands[0].present)
+ inst.instruction |= 1 << 21;
+
+ v8_1_loop_reloc (TRUE);
+ break;
+
+ case T_MNEM_wls:
+ case T_MNEM_wlstp:
+ v8_1_loop_reloc (FALSE);
+ /* fall through. */
+ case T_MNEM_dlstp:
+ case T_MNEM_dls:
+ constraint (inst.operands[1].isreg != 1, BAD_ARGS);
+
+ if (insn == T_MNEM_wlstp || insn == T_MNEM_dlstp)
+ constraint (inst.operands[1].reg == REG_PC, BAD_PC);
+ else if (inst.operands[1].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+ if (inst.operands[1].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+
+ inst.instruction |= (inst.operands[1].reg << 16);
+ break;
+
+ default:
+ abort ();
+ }
+}
+
+
+static void
+do_vfp_nsyn_cmp (void)
+{
+ enum neon_shape rs;
+ if (!inst.operands[0].isreg)
+ {
+ do_mve_vcmp ();
+ return;
+ }
+ else
+ {
+ constraint (inst.operands[2].present, BAD_SYNTAX);
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
+ BAD_FPU);
+ }
+
+ if (inst.operands[1].isreg)
+ {
+ rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+ neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
+
+ if (rs == NS_FF || rs == NS_HH)
+ {
+ NEON_ENCODE (SINGLE, inst);
+ do_vfp_sp_monadic ();
+ }
+ else
+ {
+ NEON_ENCODE (DOUBLE, inst);
+ do_vfp_dp_rd_rm ();
+ }
+ }
+ else
+ {
+ rs = neon_select_shape (NS_HI, NS_FI, NS_DI, NS_NULL);
+ neon_check_type (2, rs, N_F_ALL | N_KEY | N_VFP, N_EQK);
+
+ switch (inst.instruction & 0x0fffffff)
+ {
+ case N_MNEM_vcmp:
+ inst.instruction += N_MNEM_vcmpz - N_MNEM_vcmp;
+ break;
+ case N_MNEM_vcmpe:
+ inst.instruction += N_MNEM_vcmpez - N_MNEM_vcmpe;
+ break;
+ default:
+ abort ();
+ }
+
+ if (rs == NS_FI || rs == NS_HI)
+ {
+ NEON_ENCODE (SINGLE, inst);
+ do_vfp_sp_compare_z ();
+ }
+ else
+ {
+ NEON_ENCODE (DOUBLE, inst);
+ do_vfp_dp_rd ();
+ }
+ }
+ do_vfp_cond_or_thumb ();
+
+ /* ARMv8.2 fp16 instruction. */
+ if (rs == NS_HI || rs == NS_HH)
+ do_scalar_fp16_v82_encode ();
+}
static void
-mve_encode_qqr (int size, int fp)
+nsyn_insert_sp (void)
+{
+ inst.operands[1] = inst.operands[0];
+ memset (&inst.operands[0], '\0', sizeof (inst.operands[0]));
+ inst.operands[0].reg = REG_SP;
+ inst.operands[0].isreg = 1;
+ inst.operands[0].writeback = 1;
+ inst.operands[0].present = 1;
+}
+
+/* Fix up Neon data-processing instructions, ORing in the correct bits for
+ ARM mode or Thumb mode and moving the encoded bit 24 to bit 28. */
+
+static void
+neon_dp_fixup (struct arm_it* insn)
+{
+ unsigned int i = insn->instruction;
+ insn->is_neon = 1;
+
+ if (thumb_mode)
+ {
+ /* The U bit is at bit 24 by default. Move to bit 28 in Thumb mode. */
+ if (i & (1 << 24))
+ i |= 1 << 28;
+
+ i &= ~(1 << 24);
+
+ i |= 0xef000000;
+ }
+ else
+ i |= 0xf2000000;
+
+ insn->instruction = i;
+}
+
+static void
+mve_encode_qqr (int size, int U, int fp)
{
if (inst.operands[2].reg == REG_SP)
as_tsktsk (MVE_BAD_SP);
/* vsub. */
else if (((unsigned)inst.instruction) == 0x200d00)
inst.instruction = 0xee301f40;
+ /* vmul. */
+ else if (((unsigned)inst.instruction) == 0x1000d10)
+ inst.instruction = 0xee310e60;
/* Setting size which is 1 for F16 and 0 for F32. */
inst.instruction |= (size == 16) << 28;
/* vsub. */
else if (((unsigned)inst.instruction) == 0x1000800)
inst.instruction = 0xee011f40;
+ /* vhadd. */
+ else if (((unsigned)inst.instruction) == 0)
+ inst.instruction = 0xee000f40;
+ /* vhsub. */
+ else if (((unsigned)inst.instruction) == 0x200)
+ inst.instruction = 0xee001f40;
+ /* vmla. */
+ else if (((unsigned)inst.instruction) == 0x900)
+ inst.instruction = 0xee010e40;
+ /* vmul. */
+ else if (((unsigned)inst.instruction) == 0x910)
+ inst.instruction = 0xee011e60;
+ /* vqadd. */
+ else if (((unsigned)inst.instruction) == 0x10)
+ inst.instruction = 0xee000f60;
+ /* vqsub. */
+ else if (((unsigned)inst.instruction) == 0x210)
+ inst.instruction = 0xee001f60;
+ /* vqrdmlah. */
+ else if (((unsigned)inst.instruction) == 0x3000b10)
+ inst.instruction = 0xee000e40;
+ /* vqdmulh. */
+ else if (((unsigned)inst.instruction) == 0x0000b00)
+ inst.instruction = 0xee010e60;
+ /* vqrdmulh. */
+ else if (((unsigned)inst.instruction) == 0x1000b00)
+ inst.instruction = 0xfe010e60;
+
+ /* Set U-bit. */
+ inst.instruction |= U << 28;
+
/* Setting bits for size. */
inst.instruction |= neon_logbits (size) << 20;
}
inst.is_neon = 1;
}
-
+static void
+mve_encode_rq (unsigned bit28, unsigned size)
+{
+ inst.instruction |= bit28 << 28;
+ inst.instruction |= neon_logbits (size) << 18;
+ inst.instruction |= inst.operands[0].reg << 12;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.is_neon = 1;
+}
+
+static void
+mve_encode_rrqq (unsigned U, unsigned size)
+{
+ constraint (inst.operands[3].reg > 14, MVE_BAD_QREG);
+
+ inst.instruction |= U << 28;
+ inst.instruction |= (inst.operands[1].reg >> 1) << 20;
+ inst.instruction |= LOW4 (inst.operands[2].reg) << 16;
+ inst.instruction |= (size == 32) << 16;
+ inst.instruction |= inst.operands[0].reg << 12;
+ inst.instruction |= HI1 (inst.operands[2].reg) << 7;
+ inst.instruction |= inst.operands[3].reg;
+ inst.is_neon = 1;
+}
+
+/* Helper function for neon_three_same handling the operands. */
+static void
+neon_three_args (int isquad)
+{
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= (isquad != 0) << 6;
+ inst.is_neon = 1;
+}
+
/* Encode insns with bit pattern:
|28/24|23|22 |21 20|19 16|15 12|11 8|7|6|5|4|3 0|
static void
neon_three_same (int isquad, int ubit, int size)
{
- inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
- inst.instruction |= HI1 (inst.operands[0].reg) << 22;
- inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
- inst.instruction |= HI1 (inst.operands[1].reg) << 7;
- inst.instruction |= LOW4 (inst.operands[2].reg);
- inst.instruction |= HI1 (inst.operands[2].reg) << 5;
- inst.instruction |= (isquad != 0) << 6;
+ neon_three_args (isquad);
inst.instruction |= (ubit != 0) << 24;
if (size != -1)
inst.instruction |= neon_logbits (size) << 20;
neon_dp_fixup (&inst);
}
+enum vfp_or_neon_is_neon_bits
+{
+NEON_CHECK_CC = 1,
+NEON_CHECK_ARCH = 2,
+NEON_CHECK_ARCH8 = 4
+};
+
+/* Call this function if an instruction which may have belonged to the VFP or
+ Neon instruction sets, but turned out to be a Neon instruction (due to the
+ operand types involved, etc.). We have to check and/or fix-up a couple of
+ things:
+
+ - Make sure the user hasn't attempted to make a Neon instruction
+ conditional.
+ - Alter the value in the condition code field if necessary.
+ - Make sure that the arch supports Neon instructions.
+
+ Which of these operations take place depends on bits from enum
+ vfp_or_neon_is_neon_bits.
+
+ WARNING: This function has side effects! If NEON_CHECK_CC is used and the
+ current instruction's condition is COND_ALWAYS, the condition field is
+ changed to inst.uncond_value. This is necessary because instructions shared
+ between VFP and Neon may be conditional for the VFP variants only, and the
+ unconditional Neon version must have, e.g., 0xF in the condition field. */
+
+static int
+vfp_or_neon_is_neon (unsigned check)
+{
+/* Conditions are always legal in Thumb mode (IT blocks). */
+if (!thumb_mode && (check & NEON_CHECK_CC))
+ {
+ if (inst.cond != COND_ALWAYS)
+ {
+ first_error (_(BAD_COND));
+ return FAIL;
+ }
+ if (inst.uncond_value != -1)
+ inst.instruction |= inst.uncond_value << 28;
+ }
+
+
+ if (((check & NEON_CHECK_ARCH) && !mark_feature_used (&fpu_neon_ext_v1))
+ || ((check & NEON_CHECK_ARCH8)
+ && !mark_feature_used (&fpu_neon_ext_armv8)))
+ {
+ first_error (_(BAD_FPU));
+ return FAIL;
+ }
+
+return SUCCESS;
+}
+
+
+/* Return TRUE if the SIMD instruction is available for the current
+ cpu_variant. FP is set to TRUE if this is a SIMD floating-point
+ instruction. CHECK contains th. CHECK contains the set of bits to pass to
+ vfp_or_neon_is_neon for the NEON specific checks. */
+
+static bfd_boolean
+check_simd_pred_availability (int fp, unsigned check)
+{
+if (inst.cond > COND_ALWAYS)
+ {
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ inst.error = BAD_FPU;
+ return FALSE;
+ }
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ }
+else if (inst.cond < COND_ALWAYS)
+ {
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+ else if (vfp_or_neon_is_neon (check) == FAIL)
+ return FALSE;
+ }
+else
+ {
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, fp ? mve_fp_ext : mve_ext)
+ && vfp_or_neon_is_neon (check) == FAIL)
+ return FALSE;
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+ }
+return TRUE;
+}
+
/* Neon instruction encoders, in approximate order of appearance. */
static void
do_neon_dyadic_i_su (void)
{
- enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_EQK, N_SU_32 | N_KEY);
- neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+ else
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+
+ et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_32 | N_KEY);
+
+
+ if (rs != NS_QQR)
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ else
+ mve_encode_qqr (et.size, et.type == NT_unsigned, 0);
}
static void
do_neon_dyadic_i64_su (void)
{
- enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_EQK, N_SU_ALL | N_KEY);
- neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
+ return;
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_ALL | N_KEY);
+ }
+ if (rs == NS_QQR)
+ mve_encode_qqr (et.size, et.type == NT_unsigned, 0);
+ else
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
}
static void
}
static void
-do_neon_shl_imm (void)
+do_neon_shl (void)
{
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
if (!inst.operands[2].isreg)
{
- enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
- struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_MVE);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
+ }
int imm = inst.operands[2].imm;
constraint (imm < 0 || (unsigned)imm >= et.size,
}
else
{
- enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
- unsigned int tmp;
-
- /* VSHL/VQSHL 3-register variants have syntax such as:
- vshl.xx Dd, Dm, Dn
- whereas other 3-register operations encoded by neon_three_same have
- syntax like:
- vadd.xx Dd, Dn, Dm
- (i.e. with Dn & Dm reversed). Swap operands[1].reg and operands[2].reg
- here. */
- tmp = inst.operands[2].reg;
- inst.operands[2].reg = inst.operands[1].reg;
- inst.operands[1].reg = tmp;
- NEON_ENCODE (INTEGER, inst);
- neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_SU_MVE | N_KEY, N_EQK | N_EQK);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+ }
+
+
+ if (rs == NS_QQR)
+ {
+ constraint (inst.operands[0].reg != inst.operands[1].reg,
+ _("invalid instruction shape"));
+ if (inst.operands[2].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[2].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+
+ inst.instruction = 0xee311e60;
+ inst.instruction |= (et.type == NT_unsigned) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= neon_logbits (et.size) << 18;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= inst.operands[2].reg;
+ inst.is_neon = 1;
+ }
+ else
+ {
+ unsigned int tmp;
+
+ /* VSHL/VQSHL 3-register variants have syntax such as:
+ vshl.xx Dd, Dm, Dn
+ whereas other 3-register operations encoded by neon_three_same have
+ syntax like:
+ vadd.xx Dd, Dn, Dm
+ (i.e. with Dn & Dm reversed). Swap operands[1].reg and
+ operands[2].reg here. */
+ tmp = inst.operands[2].reg;
+ inst.operands[2].reg = inst.operands[1].reg;
+ inst.operands[1].reg = tmp;
+ NEON_ENCODE (INTEGER, inst);
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ }
}
}
static void
-do_neon_qshl_imm (void)
+do_neon_qshl (void)
{
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
if (!inst.operands[2].isreg)
{
- enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
- struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_KEY | N_SU_MVE);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+ }
int imm = inst.operands[2].imm;
constraint (imm < 0 || (unsigned)imm >= et.size,
}
else
{
- enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
- unsigned int tmp;
+ enum neon_shape rs;
+ struct neon_type_el et;
- /* See note in do_neon_shl_imm. */
- tmp = inst.operands[2].reg;
- inst.operands[2].reg = inst.operands[1].reg;
- inst.operands[1].reg = tmp;
- NEON_ENCODE (INTEGER, inst);
- neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_SU_MVE | N_KEY, N_EQK | N_EQK);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+ }
+
+ if (rs == NS_QQR)
+ {
+ constraint (inst.operands[0].reg != inst.operands[1].reg,
+ _("invalid instruction shape"));
+ if (inst.operands[2].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[2].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+
+ inst.instruction = 0xee311ee0;
+ inst.instruction |= (et.type == NT_unsigned) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= neon_logbits (et.size) << 18;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= inst.operands[2].reg;
+ inst.is_neon = 1;
+ }
+ else
+ {
+ unsigned int tmp;
+
+ /* See note in do_neon_shl. */
+ tmp = inst.operands[2].reg;
+ inst.operands[2].reg = inst.operands[1].reg;
+ inst.operands[1].reg = tmp;
+ NEON_ENCODE (INTEGER, inst);
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ }
}
}
static void
do_neon_rshl (void)
{
- enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_EQK, N_SU_ALL | N_KEY);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_ALL | N_KEY);
+ }
+
unsigned int tmp;
- tmp = inst.operands[2].reg;
- inst.operands[2].reg = inst.operands[1].reg;
- inst.operands[1].reg = tmp;
- neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ if (rs == NS_QQR)
+ {
+ if (inst.operands[2].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+ else if (inst.operands[2].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+
+ constraint (inst.operands[0].reg != inst.operands[1].reg,
+ _("invalid instruction shape"));
+
+ if (inst.instruction == 0x0000510)
+ /* We are dealing with vqrshl. */
+ inst.instruction = 0xee331ee0;
+ else
+ /* We are dealing with vrshl. */
+ inst.instruction = 0xee331e60;
+
+ inst.instruction |= (et.type == NT_unsigned) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= neon_logbits (et.size) << 18;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= inst.operands[2].reg;
+ inst.is_neon = 1;
+ }
+ else
+ {
+ tmp = inst.operands[2].reg;
+ inst.operands[2].reg = inst.operands[1].reg;
+ inst.operands[1].reg = tmp;
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ }
}
static int
if (inst.operands[2].present && inst.operands[2].isreg)
{
enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ if (rs == NS_QQQ
+ && !check_simd_pred_availability (FALSE,
+ NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+ else if (rs != NS_QQQ
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1))
+ first_error (BAD_FPU);
+
neon_check_type (3, rs, N_IGNORE_TYPE);
/* U bit and size field were set as part of the bitmask. */
NEON_ENCODE (INTEGER, inst);
enum neon_shape rs = (three_ops_form
? neon_select_shape (NS_DDI, NS_QQI, NS_NULL)
: neon_select_shape (NS_DI, NS_QI, NS_NULL));
- struct neon_type_el et = neon_check_type (2, rs,
- N_I8 | N_I16 | N_I32 | N_I64 | N_F32 | N_KEY, N_EQK);
+ /* Because neon_select_shape makes the second operand a copy of the first
+ if the second operand is not present. */
+ if (rs == NS_QQI
+ && !check_simd_pred_availability (FALSE,
+ NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+ else if (rs != NS_QQI
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1))
+ first_error (BAD_FPU);
+
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ et = neon_check_type (2, rs, N_I32 | N_I16 | N_KEY, N_EQK);
+ else
+ et = neon_check_type (2, rs, N_I8 | N_I16 | N_I32 | N_I64 | N_F32
+ | N_KEY, N_EQK);
+
+ if (et.type == NT_invtype)
+ return;
enum neon_opc opcode = (enum neon_opc) inst.instruction & 0x0fffffff;
unsigned immbits;
int cmode;
- if (et.type == NT_invtype)
- return;
if (three_ops_form)
constraint (inst.operands[0].reg != inst.operands[1].reg,
static void
neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types,
- unsigned destbits)
-{
- enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_QQR, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs, N_EQK | destbits, N_EQK,
- types | N_KEY);
- if (et.type == NT_float)
- {
- NEON_ENCODE (FLOAT, inst);
- if (rs == NS_QQR)
- mve_encode_qqr (et.size, 1);
- else
- neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
- }
- else
- {
- NEON_ENCODE (INTEGER, inst);
- if (rs == NS_QQR)
- mve_encode_qqr (et.size, 0);
- else
- neon_three_same (neon_quad (rs), et.type == ubit_meaning, et.size);
- }
-}
-
-
-static void
-do_neon_dyadic_if_su_d (void)
-{
- /* This version only allow D registers, but that constraint is enforced during
- operand parsing so we don't need to do anything extra here. */
- neon_dyadic_misc (NT_unsigned, N_SUF_32, 0);
-}
-
-static void
-do_neon_dyadic_if_i_d (void)
-{
- /* The "untyped" case can't happen. Do this to stop the "U" bit being
- affected if we specify unsigned args. */
- neon_dyadic_misc (NT_untyped, N_IF_32, 0);
-}
-
-enum vfp_or_neon_is_neon_bits
-{
- NEON_CHECK_CC = 1,
- NEON_CHECK_ARCH = 2,
- NEON_CHECK_ARCH8 = 4
-};
-
-/* Call this function if an instruction which may have belonged to the VFP or
- Neon instruction sets, but turned out to be a Neon instruction (due to the
- operand types involved, etc.). We have to check and/or fix-up a couple of
- things:
-
- - Make sure the user hasn't attempted to make a Neon instruction
- conditional.
- - Alter the value in the condition code field if necessary.
- - Make sure that the arch supports Neon instructions.
-
- Which of these operations take place depends on bits from enum
- vfp_or_neon_is_neon_bits.
-
- WARNING: This function has side effects! If NEON_CHECK_CC is used and the
- current instruction's condition is COND_ALWAYS, the condition field is
- changed to inst.uncond_value. This is necessary because instructions shared
- between VFP and Neon may be conditional for the VFP variants only, and the
- unconditional Neon version must have, e.g., 0xF in the condition field. */
-
-static int
-vfp_or_neon_is_neon (unsigned check)
-{
- /* Conditions are always legal in Thumb mode (IT blocks). */
- if (!thumb_mode && (check & NEON_CHECK_CC))
- {
- if (inst.cond != COND_ALWAYS)
- {
- first_error (_(BAD_COND));
- return FAIL;
- }
- if (inst.uncond_value != -1)
- inst.instruction |= inst.uncond_value << 28;
- }
-
-
- if (((check & NEON_CHECK_ARCH) && !mark_feature_used (&fpu_neon_ext_v1))
- || ((check & NEON_CHECK_ARCH8)
- && !mark_feature_used (&fpu_neon_ext_armv8)))
- {
- first_error (_(BAD_FPU));
- return FAIL;
- }
-
- return SUCCESS;
-}
-
-static int
-check_simd_pred_availability (int fp, unsigned check)
+ unsigned destbits)
{
- if (inst.cond > COND_ALWAYS)
- {
- if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
- {
- inst.error = BAD_FPU;
- return 1;
- }
- inst.pred_insn_type = INSIDE_VPT_INSN;
- }
- else if (inst.cond < COND_ALWAYS)
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_QQR, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs, N_EQK | destbits, N_EQK,
+ types | N_KEY);
+ if (et.type == NT_float)
{
- if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
- inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
- else if (vfp_or_neon_is_neon (check) == FAIL)
- return 2;
+ NEON_ENCODE (FLOAT, inst);
+ if (rs == NS_QQR)
+ mve_encode_qqr (et.size, 0, 1);
+ else
+ neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
}
else
{
- if (!ARM_CPU_HAS_FEATURE (cpu_variant, fp ? mve_fp_ext : mve_ext)
- && vfp_or_neon_is_neon (check) == FAIL)
- return 3;
-
- if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
- inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+ NEON_ENCODE (INTEGER, inst);
+ if (rs == NS_QQR)
+ mve_encode_qqr (et.size, et.type == ubit_meaning, 0);
+ else
+ neon_three_same (neon_quad (rs), et.type == ubit_meaning, et.size);
}
- return 0;
+}
+
+
+static void
+do_neon_dyadic_if_su_d (void)
+{
+ /* This version only allow D registers, but that constraint is enforced during
+ operand parsing so we don't need to do anything extra here. */
+ neon_dyadic_misc (NT_unsigned, N_SUF_32, 0);
+}
+
+static void
+do_neon_dyadic_if_i_d (void)
+{
+ /* The "untyped" case can't happen. Do this to stop the "U" bit being
+ affected if we specify unsigned args. */
+ neon_dyadic_misc (NT_untyped, N_IF_32, 0);
}
static void
do_mve_vstr_vldr_RQ (int size, int elsize, int load)
{
unsigned os = inst.operands[1].imm >> 5;
+ unsigned type = inst.vectype.el[0].type;
constraint (os != 0 && size == 8,
_("can not shift offsets when accessing less than half-word"));
constraint (os && os != neon_logbits (size),
constraint (inst.operands[0].reg == (inst.operands[1].imm & 0x1f),
_("destination register and offset register may not be"
" the same"));
- constraint (size == elsize && inst.vectype.el[0].type != NT_unsigned,
+ constraint (size == elsize && type == NT_signed, BAD_EL_TYPE);
+ constraint (size != elsize && type != NT_unsigned && type != NT_signed,
BAD_EL_TYPE);
- constraint (inst.vectype.el[0].type != NT_unsigned
- && inst.vectype.el[0].type != NT_signed, BAD_EL_TYPE);
- inst.instruction |= (inst.vectype.el[0].type == NT_unsigned) << 28;
+ inst.instruction |= ((size == elsize) || (type == NT_unsigned)) << 28;
}
else
{
- constraint (inst.vectype.el[0].type != NT_untyped, BAD_EL_TYPE);
+ constraint (type != NT_untyped, BAD_EL_TYPE);
}
inst.instruction |= 1 << 23;
inst.is_neon = 1;
}
+static void
+do_mve_vaddlv (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_RRQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_S32 | N_U32 | N_KEY);
+
+ if (et.type == NT_invtype)
+ first_error (BAD_EL_TYPE);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ constraint (inst.operands[1].reg > 14, MVE_BAD_QREG);
+
+ inst.instruction |= (et.type == NT_unsigned) << 28;
+ inst.instruction |= inst.operands[1].reg << 19;
+ inst.instruction |= inst.operands[0].reg << 12;
+ inst.instruction |= inst.operands[2].reg;
+ inst.is_neon = 1;
+}
+
static void
do_neon_dyadic_if_su (void)
{
struct neon_type_el et = neon_check_type (3, rs, N_EQK , N_EQK,
N_SUF_32 | N_KEY);
- if (check_simd_pred_availability (et.type == NT_float,
- NEON_CHECK_ARCH | NEON_CHECK_CC))
+ constraint ((inst.instruction == ((unsigned) N_MNEM_vmax)
+ || inst.instruction == ((unsigned) N_MNEM_vmin))
+ && et.type == NT_float
+ && !ARM_CPU_HAS_FEATURE (cpu_variant,fpu_neon_ext_v1), BAD_FPU);
+
+ if (!check_simd_pred_availability (et.type == NT_float,
+ NEON_CHECK_ARCH | NEON_CHECK_CC))
return;
neon_dyadic_misc (NT_unsigned, N_SUF_32, 0);
they are predicated or not. */
if ((rs == NS_QQQ || rs == NS_QQR) && et.size != 64)
{
- if (check_simd_pred_availability (et.type == NT_float,
- NEON_CHECK_ARCH | NEON_CHECK_CC))
+ if (!check_simd_pred_availability (et.type == NT_float,
+ NEON_CHECK_ARCH | NEON_CHECK_CC))
return;
}
else
static unsigned
neon_scalar_for_mul (unsigned scalar, unsigned elsize)
{
- unsigned regno = NEON_SCALAR_REG (scalar);
- unsigned elno = NEON_SCALAR_INDEX (scalar);
+ unsigned regno = NEON_SCALAR_REG (scalar);
+ unsigned elno = NEON_SCALAR_INDEX (scalar);
+
+ switch (elsize)
+ {
+ case 16:
+ if (regno > 7 || elno > 3)
+ goto bad_scalar;
+ return regno | (elno << 3);
+
+ case 32:
+ if (regno > 15 || elno > 1)
+ goto bad_scalar;
+ return regno | (elno << 4);
+
+ default:
+ bad_scalar:
+ first_error (_("scalar out of range for multiply instruction"));
+ }
+
+ return 0;
+}
+
+/* Encode multiply / multiply-accumulate scalar instructions. */
+
+static void
+neon_mul_mac (struct neon_type_el et, int ubit)
+{
+ unsigned scalar;
+
+ /* Give a more helpful error message if we have an invalid type. */
+ if (et.type == NT_invtype)
+ return;
+
+ scalar = neon_scalar_for_mul (inst.operands[2].reg, et.size);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (scalar);
+ inst.instruction |= HI1 (scalar) << 5;
+ inst.instruction |= (et.type == NT_float) << 8;
+ inst.instruction |= neon_logbits (et.size) << 20;
+ inst.instruction |= (ubit != 0) << 24;
+
+ neon_dp_fixup (&inst);
+}
+
+static void
+do_neon_mac_maybe_scalar (void)
+{
+ if (try_vfp_nsyn (3, do_vfp_nsyn_mla_mls) == SUCCESS)
+ return;
+
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
+ return;
+
+ if (inst.operands[2].isscalar)
+ {
+ constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+ enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_I16 | N_I32 | N_F_16_32 | N_KEY);
+ NEON_ENCODE (SCALAR, inst);
+ neon_mul_mac (et, neon_quad (rs));
+ }
+ else if (!inst.operands[2].isvec)
+ {
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+
+ enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
+
+ neon_dyadic_misc (NT_unsigned, N_SU_MVE, 0);
+ }
+ else
+ {
+ constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+ /* The "untyped" case can't happen. Do this to stop the "U" bit being
+ affected if we specify unsigned args. */
+ neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+ }
+}
+
+static void
+do_bfloat_vfma (void)
+{
+ constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
+ constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
+ enum neon_shape rs;
+ int t_bit = 0;
+
+ if (inst.instruction != B_MNEM_vfmab)
+ {
+ t_bit = 1;
+ inst.instruction = B_MNEM_vfmat;
+ }
+
+ if (inst.operands[2].isscalar)
+ {
+ rs = neon_select_shape (NS_QQS, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+
+ inst.instruction |= (1 << 25);
+ int index = inst.operands[2].reg & 0xf;
+ constraint (!(index < 4), _("index must be in the range 0 to 3"));
+ inst.operands[2].reg >>= 4;
+ constraint (!(inst.operands[2].reg < 8),
+ _("indexed register must be less than 8"));
+ neon_three_args (t_bit);
+ inst.instruction |= ((index & 1) << 3);
+ inst.instruction |= ((index & 2) << 4);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+ neon_three_args (t_bit);
+ }
+
+}
+
+static void
+do_neon_fmac (void)
+{
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_fma)
+ && try_vfp_nsyn (3, do_vfp_nsyn_fma_fms) == SUCCESS)
+ return;
+
+ if (!check_simd_pred_availability (TRUE, NEON_CHECK_CC | NEON_CHECK_ARCH))
+ return;
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext))
+ {
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs, N_F_MVE | N_KEY, N_EQK,
+ N_EQK);
+
+ if (rs == NS_QQR)
+ {
+
+ if (inst.operands[2].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[2].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+
+ inst.instruction = 0xee310e40;
+ inst.instruction |= (et.size == 16) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 6;
+ inst.instruction |= inst.operands[2].reg;
+ inst.is_neon = 1;
+ return;
+ }
+ }
+ else
+ {
+ constraint (!inst.operands[2].isvec, BAD_FPU);
+ }
+
+ neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+}
+
+static void
+do_mve_vfma (void)
+{
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_bf16) &&
+ inst.cond == COND_ALWAYS)
+ {
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+ inst.instruction = N_MNEM_vfma;
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ inst.cond = 0xf;
+ return do_neon_fmac();
+ }
+ else
+ {
+ do_bfloat_vfma();
+ }
+}
+
+static void
+do_neon_tst (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ neon_three_same (neon_quad (rs), 0, et.size);
+}
+
+/* VMUL with 3 registers allows the P8 type. The scalar version supports the
+ same types as the MAC equivalents. The polynomial type for this instruction
+ is encoded the same as the integer type. */
+
+static void
+do_neon_mul (void)
+{
+ if (try_vfp_nsyn (3, do_vfp_nsyn_mul) == SUCCESS)
+ return;
+
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
+ return;
+
+ if (inst.operands[2].isscalar)
+ {
+ constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+ do_neon_mac_maybe_scalar ();
+ }
+ else
+ {
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ enum neon_shape rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_I_MVE | N_F_MVE | N_KEY);
+ if (et.type == NT_float)
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext),
+ BAD_FPU);
+
+ neon_dyadic_misc (NT_float, N_I_MVE | N_F_MVE, 0);
+ }
+ else
+ {
+ constraint (!inst.operands[2].isvec, BAD_FPU);
+ neon_dyadic_misc (NT_poly,
+ N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0);
+ }
+ }
+}
+
+static void
+do_neon_qdmulh (void)
+{
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ if (inst.operands[2].isscalar)
+ {
+ constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+ enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+ NEON_ENCODE (SCALAR, inst);
+ neon_mul_mac (et, neon_quad (rs));
+ }
+ else
+ {
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL);
+ et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+ }
+
+ NEON_ENCODE (INTEGER, inst);
+ if (rs == NS_QQR)
+ mve_encode_qqr (et.size, 0, 0);
+ else
+ /* The U bit (rounding) comes from bit mask. */
+ neon_three_same (neon_quad (rs), 0, et.size);
+ }
+}
+
+static void
+do_mve_vaddv (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_RQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (2, rs, N_EQK, N_SU_32 | N_KEY);
- switch (elsize)
- {
- case 16:
- if (regno > 7 || elno > 3)
- goto bad_scalar;
- return regno | (elno << 3);
+ if (et.type == NT_invtype)
+ first_error (BAD_EL_TYPE);
- case 32:
- if (regno > 15 || elno > 1)
- goto bad_scalar;
- return regno | (elno << 4);
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
- default:
- bad_scalar:
- first_error (_("scalar out of range for multiply instruction"));
- }
+ constraint (inst.operands[1].reg > 14, MVE_BAD_QREG);
- return 0;
+ mve_encode_rq (et.type == NT_unsigned, et.size);
}
-/* Encode multiply / multiply-accumulate scalar instructions. */
-
static void
-neon_mul_mac (struct neon_type_el et, int ubit)
+do_mve_vhcadd (void)
{
- unsigned scalar;
+ enum neon_shape rs = neon_select_shape (NS_QQQI, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
- /* Give a more helpful error message if we have an invalid type. */
- if (et.type == NT_invtype)
- return;
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
- scalar = neon_scalar_for_mul (inst.operands[2].reg, et.size);
- inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
- inst.instruction |= HI1 (inst.operands[0].reg) << 22;
- inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
- inst.instruction |= HI1 (inst.operands[1].reg) << 7;
- inst.instruction |= LOW4 (scalar);
- inst.instruction |= HI1 (scalar) << 5;
- inst.instruction |= (et.type == NT_float) << 8;
- inst.instruction |= neon_logbits (et.size) << 20;
- inst.instruction |= (ubit != 0) << 24;
+ unsigned rot = inst.relocs[0].exp.X_add_number;
+ constraint (rot != 90 && rot != 270, _("immediate out of range"));
- neon_dp_fixup (&inst);
+ if (et.size == 32 && inst.operands[0].reg == inst.operands[2].reg)
+ as_tsktsk (_("Warning: 32-bit element size and same first and third "
+ "operand makes instruction UNPREDICTABLE"));
+
+ mve_encode_qqq (0, et.size);
+ inst.instruction |= (rot == 270) << 12;
+ inst.is_neon = 1;
}
static void
-do_neon_mac_maybe_scalar (void)
+do_mve_vqdmull (void)
{
- if (try_vfp_nsyn (3, do_vfp_nsyn_mla_mls) == SUCCESS)
- return;
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
- return;
+ if (et.size == 32
+ && (inst.operands[0].reg == inst.operands[1].reg
+ || (rs == NS_QQQ && inst.operands[0].reg == inst.operands[2].reg)))
+ as_tsktsk (BAD_MVE_SRCDEST);
- if (inst.operands[2].isscalar)
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ if (rs == NS_QQQ)
{
- enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_EQK, N_I16 | N_I32 | N_F_16_32 | N_KEY);
- NEON_ENCODE (SCALAR, inst);
- neon_mul_mac (et, neon_quad (rs));
+ mve_encode_qqq (et.size == 32, 64);
+ inst.instruction |= 1;
}
else
{
- /* The "untyped" case can't happen. Do this to stop the "U" bit being
- affected if we specify unsigned args. */
- neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+ mve_encode_qqr (64, et.size == 32, 0);
+ inst.instruction |= 0x3 << 5;
}
}
static void
-do_neon_fmac (void)
+do_mve_vadc (void)
{
- if (try_vfp_nsyn (3, do_vfp_nsyn_fma_fms) == SUCCESS)
- return;
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_KEY | N_I32, N_EQK, N_EQK);
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
- return;
+ if (et.type == NT_invtype)
+ first_error (BAD_EL_TYPE);
- neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ mve_encode_qqq (0, 64);
}
static void
-do_neon_tst (void)
+do_mve_vbrsr (void)
{
- enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_EQK, N_8 | N_16 | N_32 | N_KEY);
- neon_three_same (neon_quad (rs), 0, et.size);
+ enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_8 | N_16 | N_32 | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ mve_encode_qqr (et.size, 0, 0);
}
-/* VMUL with 3 registers allows the P8 type. The scalar version supports the
- same types as the MAC equivalents. The polynomial type for this instruction
- is encoded the same as the integer type. */
+static void
+do_mve_vsbc (void)
+{
+ neon_check_type (3, NS_QQQ, N_EQK, N_EQK, N_I32 | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ mve_encode_qqq (1, 64);
+}
static void
-do_neon_mul (void)
+do_mve_vmulh (void)
{
- if (try_vfp_nsyn (3, do_vfp_nsyn_mul) == SUCCESS)
- return;
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
- return;
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
- if (inst.operands[2].isscalar)
- do_neon_mac_maybe_scalar ();
+ mve_encode_qqq (et.type == NT_unsigned, et.size);
+}
+
+static void
+do_mve_vqdmlah (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_S_32 | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
else
- neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0);
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ mve_encode_qqr (et.size, et.type == NT_unsigned, 0);
}
static void
-do_neon_qdmulh (void)
+do_mve_vqdmladh (void)
{
- if (inst.operands[2].isscalar)
- {
- enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
- NEON_ENCODE (SCALAR, inst);
- neon_mul_mac (et, neon_quad (rs));
- }
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
else
- {
- enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
- NEON_ENCODE (INTEGER, inst);
- /* The U bit (rounding) comes from bit mask. */
- neon_three_same (neon_quad (rs), 0, et.size);
- }
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ mve_encode_qqq (0, et.size);
}
+
static void
do_mve_vmull (void)
{
enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_DDS,
NS_QQS, NS_QQQ, NS_QQR, NS_NULL);
- if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
- && inst.cond == COND_ALWAYS
+ if (inst.cond == COND_ALWAYS
&& ((unsigned)inst.instruction) == M_MNEM_vmullt)
{
+
if (rs == NS_QQQ)
{
-
- struct neon_type_el et = neon_check_type (3, rs, N_EQK , N_EQK,
- N_SUF_32 | N_F64 | N_P8
- | N_P16 | N_I_MVE | N_KEY);
- if (((et.type == NT_poly) && et.size == 8
- && ARM_CPU_IS_ANY (cpu_variant))
- || (et.type == NT_integer) || (et.type == NT_float))
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
goto neon_vmul;
}
else
return;
-neon_vmul:
+ neon_vmul:
inst.instruction = N_MNEM_vmul;
inst.cond = 0xb;
if (thumb_mode)
}
static void
-do_neon_qrdmlah (void)
+do_mve_vmlaldav (void)
{
- /* Check we're on the correct architecture. */
- if (!mark_feature_used (&fpu_neon_ext_armv8))
- inst.error =
- _("instruction form not available on this architecture.");
- else if (!mark_feature_used (&fpu_neon_ext_v8_1))
+ enum neon_shape rs = neon_select_shape (NS_RRQQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (4, rs, N_EQK, N_EQK, N_EQK,
+ N_S16 | N_S32 | N_U16 | N_U32 | N_KEY);
+
+ if (et.type == NT_unsigned
+ && (inst.instruction == M_MNEM_vmlsldav
+ || inst.instruction == M_MNEM_vmlsldava
+ || inst.instruction == M_MNEM_vmlsldavx
+ || inst.instruction == M_MNEM_vmlsldavax))
+ first_error (BAD_SIMD_TYPE);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ mve_encode_rrqq (et.type == NT_unsigned, et.size);
+}
+
+static void
+do_mve_vrmlaldavh (void)
+{
+ struct neon_type_el et;
+ if (inst.instruction == M_MNEM_vrmlsldavh
+ || inst.instruction == M_MNEM_vrmlsldavha
+ || inst.instruction == M_MNEM_vrmlsldavhx
+ || inst.instruction == M_MNEM_vrmlsldavhax)
{
- as_warn (_("this instruction implies use of ARMv8.1 AdvSIMD."));
- record_feature_use (&fpu_neon_ext_v8_1);
+ et = neon_check_type (4, NS_RRQQ, N_EQK, N_EQK, N_EQK, N_S32 | N_KEY);
+ if (inst.operands[1].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
}
+ else
+ {
+ if (inst.instruction == M_MNEM_vrmlaldavhx
+ || inst.instruction == M_MNEM_vrmlaldavhax)
+ et = neon_check_type (4, NS_RRQQ, N_EQK, N_EQK, N_EQK, N_S32 | N_KEY);
+ else
+ et = neon_check_type (4, NS_RRQQ, N_EQK, N_EQK, N_EQK,
+ N_U32 | N_S32 | N_KEY);
+ /* vrmlaldavh's encoding with SP as the second, odd, GPR operand may alias
+ with vmax/min instructions, making the use of SP in assembly really
+ nonsensical, so instead of issuing a warning like we do for other uses
+ of SP for the odd register operand we error out. */
+ constraint (inst.operands[1].reg == REG_SP, BAD_SP);
+ }
+
+ /* Make sure we still check the second operand is an odd one and that PC is
+ disallowed. This because we are parsing for any GPR operand, to be able
+ to distinguish between giving a warning or an error for SP as described
+ above. */
+ constraint ((inst.operands[1].reg % 2) != 1, BAD_EVEN);
+ constraint (inst.operands[1].reg == REG_PC, BAD_PC);
- if (inst.operands[2].isscalar)
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ mve_encode_rrqq (et.type == NT_unsigned, 0);
+}
+
+
+static void
+do_mve_vmaxnmv (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_RQ, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (2, rs, N_EQK, N_F_MVE | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ if (inst.operands[0].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[0].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+
+ mve_encode_rq (et.size == 16, 64);
+}
+
+static void
+do_mve_vmaxv (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_RQ, NS_NULL);
+ struct neon_type_el et;
+
+ if (inst.instruction == M_MNEM_vmaxv || inst.instruction == M_MNEM_vminv)
+ et = neon_check_type (2, rs, N_EQK, N_SU_MVE | N_KEY);
+ else
+ et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ if (inst.operands[0].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[0].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+
+ mve_encode_rq (et.type == NT_unsigned, et.size);
+}
+
+
+static void
+do_neon_qrdmlah (void)
+{
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
{
- enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
- NEON_ENCODE (SCALAR, inst);
- neon_mul_mac (et, neon_quad (rs));
+ /* Check we're on the correct architecture. */
+ if (!mark_feature_used (&fpu_neon_ext_armv8))
+ inst.error
+ = _("instruction form not available on this architecture.");
+ else if (!mark_feature_used (&fpu_neon_ext_v8_1))
+ {
+ as_warn (_("this instruction implies use of ARMv8.1 AdvSIMD."));
+ record_feature_use (&fpu_neon_ext_v8_1);
+ }
+ if (inst.operands[2].isscalar)
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+ NEON_ENCODE (SCALAR, inst);
+ neon_mul_mac (et, neon_quad (rs));
+ }
+ else
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+ NEON_ENCODE (INTEGER, inst);
+ /* The U bit (rounding) comes from bit mask. */
+ neon_three_same (neon_quad (rs), 0, et.size);
+ }
}
else
{
- enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- struct neon_type_el et = neon_check_type (3, rs,
- N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+ enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+ struct neon_type_el et
+ = neon_check_type (3, rs, N_EQK, N_EQK, N_S_32 | N_KEY);
+
NEON_ENCODE (INTEGER, inst);
- /* The U bit (rounding) comes from bit mask. */
- neon_three_same (neon_quad (rs), 0, et.size);
+ mve_encode_qqr (et.size, et.type == NT_unsigned, 0);
}
}
rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY);
- if (check_simd_pred_availability (et.type == NT_float,
- NEON_CHECK_ARCH | NEON_CHECK_CC))
+ if (!check_simd_pred_availability (et.type == NT_float,
+ NEON_CHECK_ARCH | NEON_CHECK_CC))
return;
inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
static void
do_neon_sli (void)
{
- enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
- struct neon_type_el et = neon_check_type (2, rs,
- N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ }
+
+
int imm = inst.operands[2].imm;
constraint (imm < 0 || (unsigned)imm >= et.size,
_("immediate out of range for insert"));
static void
do_neon_sri (void)
{
- enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
- struct neon_type_el et = neon_check_type (2, rs,
- N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ }
+
int imm = inst.operands[2].imm;
constraint (imm < 1 || (unsigned)imm > et.size,
_("immediate out of range for insert"));
neon_imm_shift (FALSE, 0, neon_quad (rs), et, et.size - imm);
}
-static void
-do_neon_qshlu_imm (void)
-{
- enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
- struct neon_type_el et = neon_check_type (2, rs,
- N_EQK | N_UNS, N_S8 | N_S16 | N_S32 | N_S64 | N_KEY);
+static void
+do_neon_qshlu_imm (void)
+{
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK | N_UNS,
+ N_S8 | N_S16 | N_S32 | N_S64 | N_KEY);
+ }
+
int imm = inst.operands[2].imm;
constraint (imm < 0 || (unsigned)imm >= et.size,
_("immediate out of range for shift"));
CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL) \
CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\
CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\
+ CVT_VAR (bf16_f32, N_BF16, N_F32, whole_reg, NULL, NULL, NULL) \
/* VFP instructions. */ \
CVT_VAR (f32_f64, N_F32, N_F64, N_VFP, NULL, "fcvtsd", NULL) \
CVT_VAR (f64_f32, N_F64, N_F32, N_VFP, NULL, "fcvtds", NULL) \
switch (rs)
{
- case NS_DDI:
case NS_QQI:
+ if (mode == neon_cvt_mode_z
+ && (flavour == neon_cvt_flavour_f16_s16
+ || flavour == neon_cvt_flavour_f16_u16
+ || flavour == neon_cvt_flavour_s16_f16
+ || flavour == neon_cvt_flavour_u16_f16
+ || flavour == neon_cvt_flavour_f32_u32
+ || flavour == neon_cvt_flavour_f32_s32
+ || flavour == neon_cvt_flavour_s32_f32
+ || flavour == neon_cvt_flavour_u32_f32))
+ {
+ if (!check_simd_pred_availability (TRUE,
+ NEON_CHECK_CC | NEON_CHECK_ARCH))
+ return;
+ }
+ else if (mode == neon_cvt_mode_n)
+ {
+ /* We are dealing with vcvt with the 'ne' condition. */
+ inst.cond = 0x1;
+ inst.instruction = N_MNEM_vcvt;
+ do_neon_cvt_1 (neon_cvt_mode_z);
+ return;
+ }
+ /* fall through. */
+ case NS_DDI:
{
unsigned immbits;
unsigned enctab[] = {0x0000100, 0x1000100, 0x0, 0x1000000,
0x0000100, 0x1000100, 0x0, 0x1000000};
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
- return;
+ if ((rs != NS_QQI || !ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext))
+ && vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext))
+ {
+ constraint (inst.operands[2].present && inst.operands[2].imm == 0,
+ _("immediate value out of range"));
+ switch (flavour)
+ {
+ case neon_cvt_flavour_f16_s16:
+ case neon_cvt_flavour_f16_u16:
+ case neon_cvt_flavour_s16_f16:
+ case neon_cvt_flavour_u16_f16:
+ constraint (inst.operands[2].imm > 16,
+ _("immediate value out of range"));
+ break;
+ case neon_cvt_flavour_f32_u32:
+ case neon_cvt_flavour_f32_s32:
+ case neon_cvt_flavour_s32_f32:
+ case neon_cvt_flavour_u32_f32:
+ constraint (inst.operands[2].imm > 32,
+ _("immediate value out of range"));
+ break;
+ default:
+ inst.error = BAD_FPU;
+ return;
+ }
+ }
/* Fixed-point conversion with #0 immediate is encoded as an
integer conversion. */
}
break;
- case NS_DD:
case NS_QQ:
+ if ((mode == neon_cvt_mode_a || mode == neon_cvt_mode_n
+ || mode == neon_cvt_mode_m || mode == neon_cvt_mode_p)
+ && (flavour == neon_cvt_flavour_s16_f16
+ || flavour == neon_cvt_flavour_u16_f16
+ || flavour == neon_cvt_flavour_s32_f32
+ || flavour == neon_cvt_flavour_u32_f32))
+ {
+ if (!check_simd_pred_availability (TRUE,
+ NEON_CHECK_CC | NEON_CHECK_ARCH8))
+ return;
+ }
+ else if (mode == neon_cvt_mode_z
+ && (flavour == neon_cvt_flavour_f16_s16
+ || flavour == neon_cvt_flavour_f16_u16
+ || flavour == neon_cvt_flavour_s16_f16
+ || flavour == neon_cvt_flavour_u16_f16
+ || flavour == neon_cvt_flavour_f32_u32
+ || flavour == neon_cvt_flavour_f32_s32
+ || flavour == neon_cvt_flavour_s32_f32
+ || flavour == neon_cvt_flavour_u32_f32))
+ {
+ if (!check_simd_pred_availability (TRUE,
+ NEON_CHECK_CC | NEON_CHECK_ARCH))
+ return;
+ }
+ /* fall through. */
+ case NS_DD:
if (mode != neon_cvt_mode_x && mode != neon_cvt_mode_z)
{
- NEON_ENCODE (FLOAT, inst);
- set_pred_insn_type (OUTSIDE_PRED_INSN);
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
+ NEON_ENCODE (FLOAT, inst);
+ if (!check_simd_pred_availability (TRUE,
+ NEON_CHECK_CC | NEON_CHECK_ARCH8))
return;
inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
NEON_ENCODE (INTEGER, inst);
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
- return;
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext))
+ {
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+ }
if (flavour != neon_cvt_flavour_invalid)
inst.instruction |= enctab[flavour];
}
if (rs == NS_DQ)
- inst.instruction = 0x3b60600;
+ {
+ if (flavour == neon_cvt_flavour_bf16_f32)
+ {
+ if (vfp_or_neon_is_neon (NEON_CHECK_ARCH8) == FAIL)
+ return;
+ constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
+ /* VCVT.bf16.f32. */
+ inst.instruction = 0x11b60640;
+ }
+ else
+ /* VCVT.f16.f32. */
+ inst.instruction = 0x3b60600;
+ }
else
+ /* VCVT.f32.f16. */
inst.instruction = 0x3b60700;
inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
do_neon_cvttb_1 (bfd_boolean t)
{
enum neon_shape rs = neon_select_shape (NS_HF, NS_HD, NS_FH, NS_FF, NS_FD,
- NS_DF, NS_DH, NS_NULL);
+ NS_DF, NS_DH, NS_QQ, NS_QQI, NS_NULL);
if (rs == NS_NULL)
return;
+ else if (rs == NS_QQ || rs == NS_QQI)
+ {
+ int single_to_half = 0;
+ if (!check_simd_pred_availability (TRUE, NEON_CHECK_ARCH))
+ return;
+
+ enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+ && (flavour == neon_cvt_flavour_u16_f16
+ || flavour == neon_cvt_flavour_s16_f16
+ || flavour == neon_cvt_flavour_f16_s16
+ || flavour == neon_cvt_flavour_f16_u16
+ || flavour == neon_cvt_flavour_u32_f32
+ || flavour == neon_cvt_flavour_s32_f32
+ || flavour == neon_cvt_flavour_f32_s32
+ || flavour == neon_cvt_flavour_f32_u32))
+ {
+ inst.cond = 0xf;
+ inst.instruction = N_MNEM_vcvt;
+ set_pred_insn_type (INSIDE_VPT_INSN);
+ do_neon_cvt_1 (neon_cvt_mode_z);
+ return;
+ }
+ else if (rs == NS_QQ && flavour == neon_cvt_flavour_f32_f16)
+ single_to_half = 1;
+ else if (rs == NS_QQ && flavour != neon_cvt_flavour_f16_f32)
+ {
+ first_error (BAD_FPU);
+ return;
+ }
+
+ inst.instruction = 0xee3f0e01;
+ inst.instruction |= single_to_half << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 13;
+ inst.instruction |= t << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 1;
+ inst.is_neon = 1;
+ }
else if (neon_check_type (2, rs, N_F16, N_F32 | N_VFP).type != NT_invtype)
{
inst.error = NULL;
inst.error = NULL;
do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
}
+ else if (neon_check_type (2, rs, N_BF16 | N_VFP, N_F32).type != NT_invtype)
+ {
+ constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
+ inst.error = NULL;
+ inst.instruction |= (1 << 8);
+ inst.instruction &= ~(1 << 9);
+ do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/FALSE);
+ }
else
return;
}
static void
do_neon_mvn (void)
{
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
+ return;
+
if (inst.operands[1].isreg)
{
- enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ enum neon_shape rs;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ rs = neon_select_shape (NS_QQ, NS_NULL);
+ else
+ rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
NEON_ENCODE (INTEGER, inst);
inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
}
neon_dp_fixup (&inst);
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ constraint (!inst.operands[1].isreg && !inst.operands[0].isquad, BAD_FPU);
+ }
}
/* Encode instructions of form:
static void
do_neon_dyadic_long (void)
{
- enum neon_shape rs = neon_select_shape (NS_QDD, NS_QQQ, NS_QQR, NS_NULL);
+ enum neon_shape rs = neon_select_shape (NS_QDD, NS_HHH, NS_FFF, NS_DDD, NS_NULL);
if (rs == NS_QDD)
{
if (vfp_or_neon_is_neon (NEON_CHECK_ARCH | NEON_CHECK_CC) == FAIL)
| ((elno & 0x1) << 3));
}
-bad_scalar:
+ bad_scalar:
first_error (_("scalar out of range for multiply instruction"));
return 0;
}
0x2. */
int size = -1;
- if (inst.cond != COND_ALWAYS)
- as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
- "behaviour is UNPREDICTABLE"));
-
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
- _(BAD_FP16));
-
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
- _(BAD_FPU));
-
/* vfmal/vfmsl are in three-same D/Q register format or the third operand can
be a scalar index register. */
if (inst.operands[2].isscalar)
rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL);
}
- neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16);
+
+ if (inst.cond != COND_ALWAYS)
+ as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
+ "behaviour is UNPREDICTABLE"));
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
+ _(BAD_FP16));
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
+ _(BAD_FPU));
/* "opcode" from template has included "ubit", so simply pass 0 here. Also,
the "S" bit in size field has been reused to differentiate vfmal and vfmsl,
inst.instruction &= 0x00ffffff;
inst.instruction |= high8;
-#define LOW1(R) ((R) & 0x1)
-#define HI4(R) (((R) >> 1) & 0xf)
/* Unlike usually NEON three-same, encoding for Vn and Vm will depend on
whether the instruction is in Q form and whether Vm is a scalar indexed
operand. */
static void
do_neon_rev (void)
{
- enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ enum neon_shape rs;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ rs = neon_select_shape (NS_QQ, NS_NULL);
+ else
+ rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+
struct neon_type_el et = neon_check_type (2, rs,
N_EQK, N_8 | N_16 | N_32 | N_KEY);
+
unsigned op = (inst.instruction >> 7) & 3;
/* N (width of reversed regions) is encoded as part of the bitmask. We
extract it here to check the elements to be reversed are smaller.
Otherwise we'd get a reserved instruction. */
unsigned elsize = (op == 2) ? 16 : (op == 1) ? 32 : (op == 0) ? 64 : 0;
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) && elsize == 64
+ && inst.operands[0].reg == inst.operands[1].reg)
+ as_tsktsk (_("Warning: 64-bit element size and same destination and source"
+ " operands makes instruction UNPREDICTABLE"));
+
gas_assert (elsize != 0);
constraint (et.size >= elsize,
_("elements must be smaller than reversal region"));
{
if (inst.operands[1].isscalar)
{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1),
+ BAD_FPU);
enum neon_shape rs = neon_select_shape (NS_DS, NS_QS, NS_NULL);
struct neon_type_el et = neon_check_type (2, rs,
N_EQK, N_8 | N_16 | N_32 | N_KEY);
enum neon_shape rs = neon_select_shape (NS_DR, NS_QR, NS_NULL);
struct neon_type_el et = neon_check_type (2, rs,
N_8 | N_16 | N_32 | N_KEY, N_EQK);
+ if (rs == NS_QR)
+ {
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH))
+ return;
+ }
+ else
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1),
+ BAD_FPU);
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ if (inst.operands[1].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[1].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+ }
+
/* Duplicate ARM register to lanes of vector. */
NEON_ENCODE (ARMREG, inst);
switch (et.size)
}
}
+static void
+do_mve_mov (int toQ)
+{
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ return;
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = MVE_UNPREDICABLE_INSN;
+
+ unsigned Rt = 0, Rt2 = 1, Q0 = 2, Q1 = 3;
+ if (toQ)
+ {
+ Q0 = 0;
+ Q1 = 1;
+ Rt = 2;
+ Rt2 = 3;
+ }
+
+ constraint (inst.operands[Q0].reg != inst.operands[Q1].reg + 2,
+ _("Index one must be [2,3] and index two must be two less than"
+ " index one."));
+ constraint (inst.operands[Rt].reg == inst.operands[Rt2].reg,
+ _("General purpose registers may not be the same"));
+ constraint (inst.operands[Rt].reg == REG_SP
+ || inst.operands[Rt2].reg == REG_SP,
+ BAD_SP);
+ constraint (inst.operands[Rt].reg == REG_PC
+ || inst.operands[Rt2].reg == REG_PC,
+ BAD_PC);
+
+ inst.instruction = 0xec000f00;
+ inst.instruction |= HI1 (inst.operands[Q1].reg / 32) << 23;
+ inst.instruction |= !!toQ << 20;
+ inst.instruction |= inst.operands[Rt2].reg << 16;
+ inst.instruction |= LOW4 (inst.operands[Q1].reg / 32) << 13;
+ inst.instruction |= (inst.operands[Q1].reg % 4) << 4;
+ inst.instruction |= inst.operands[Rt].reg;
+}
+
+static void
+do_mve_movn (void)
+{
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ return;
+
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+ struct neon_type_el et = neon_check_type (2, NS_QQ, N_EQK, N_I16 | N_I32
+ | N_KEY);
+
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= (neon_logbits (et.size) - 1) << 18;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.is_neon = 1;
+
+}
+
/* VMOV has particularly many variations. It can be one of:
0. VMOV<c><q> <Qd>, <Qm>
1. VMOV<c><q> <Dd>, <Dm>
(Two ARM regs to two VFP singles.)
15. VMOV <Sd>, <Se>, <Rn>, <Rm>
(Two VFP singles to two ARM regs.)
+ 16. VMOV<c> <Rt>, <Rt2>, <Qd[idx]>, <Qd[idx2]>
+ 17. VMOV<c> <Qd[idx]>, <Qd[idx2]>, <Rt>, <Rt2>
+ 18. VMOV<c>.<dt> <Rt>, <Qn[idx]>
+ 19. VMOV<c>.<dt> <Qd[idx]>, <Rt>
These cases can be disambiguated using neon_select_shape, except cases 1/9
and 3/11 which depend on the operand type too.
static void
do_neon_mov (void)
{
- enum neon_shape rs = neon_select_shape (NS_RRFF, NS_FFRR, NS_DRR, NS_RRD,
- NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR,
- NS_RS, NS_FF, NS_FI, NS_RF, NS_FR,
- NS_HR, NS_RH, NS_HI, NS_NULL);
+ enum neon_shape rs = neon_select_shape (NS_RRSS, NS_SSRR, NS_RRFF, NS_FFRR,
+ NS_DRR, NS_RRD, NS_QQ, NS_DD, NS_QI,
+ NS_DI, NS_SR, NS_RS, NS_FF, NS_FI,
+ NS_RF, NS_FR, NS_HR, NS_RH, NS_HI,
+ NS_NULL);
struct neon_type_el et;
const char *ldconst = 0;
et = neon_check_type (2, rs, N_EQK, N_F64 | N_KEY);
/* It is not an error here if no type is given. */
inst.error = NULL;
- if (et.type == NT_float && et.size == 64)
+
+ /* In MVE we interpret the following instructions as same, so ignoring
+ the following type (float) and size (64) checks.
+ a: VMOV<c><q> <Dd>, <Dm>
+ b: VMOV<c><q>.F64 <Dd>, <Dm>. */
+ if ((et.type == NT_float && et.size == 64)
+ || (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)))
{
do_vfp_nsyn_opcode ("fcpyd");
break;
case NS_QQ: /* case 0/1. */
{
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ if (!check_simd_pred_availability (FALSE,
+ NEON_CHECK_CC | NEON_CHECK_ARCH))
return;
/* The architecture manual I have doesn't explicitly state which
value the U bit should have for register->register moves, but
/* fall through. */
case NS_QI: /* case 2/3. */
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ if (!check_simd_pred_availability (FALSE,
+ NEON_CHECK_CC | NEON_CHECK_ARCH))
return;
inst.instruction = 0x0800010;
neon_move_immediate ();
et = neon_check_type (2, NS_NULL, N_8 | N_16 | N_32 | N_KEY, N_EQK);
logsize = neon_logbits (et.size);
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
- _(BAD_FPU));
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1)
- && et.size != 32, _(BAD_FPU));
+ if (et.size != 32)
+ {
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+ && vfp_or_neon_is_neon (NEON_CHECK_ARCH) == FAIL)
+ return;
+ }
+ else
+ {
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
+ }
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ if (inst.operands[1].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[1].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+ }
+ unsigned size = inst.operands[0].isscalar == 1 ? 64 : 128;
+
constraint (et.type == NT_invtype, _("bad type for scalar"));
- constraint (x >= 64 / et.size, _("scalar index out of range"));
+ constraint (x >= size / et.size, _("scalar index out of range"));
+
switch (et.size)
{
default: ;
}
- bcdebits |= x << logsize;
+ bcdebits |= (x & ((1 << (3-logsize)) - 1)) << logsize;
inst.instruction = 0xe000b10;
do_vfp_cond_or_thumb ();
inst.instruction |= HI1 (dn) << 7;
inst.instruction |= inst.operands[1].reg << 12;
inst.instruction |= (bcdebits & 3) << 5;
- inst.instruction |= (bcdebits >> 2) << 21;
+ inst.instruction |= ((bcdebits >> 2) & 3) << 21;
+ inst.instruction |= (x >> (3-logsize)) << 16;
}
break;
case NS_DRR: /* case 5 (fmdrr). */
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2),
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
_(BAD_FPU));
inst.instruction = 0xc400b10;
N_EQK, N_S8 | N_S16 | N_U8 | N_U16 | N_32 | N_KEY);
logsize = neon_logbits (et.size);
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
- _(BAD_FPU));
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1)
- && et.size != 32, _(BAD_FPU));
+ if (et.size != 32)
+ {
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+ && vfp_or_neon_is_neon (NEON_CHECK_CC
+ | NEON_CHECK_ARCH) == FAIL)
+ return;
+ }
+ else
+ {
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
+ }
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ if (inst.operands[0].reg == REG_SP)
+ as_tsktsk (MVE_BAD_SP);
+ else if (inst.operands[0].reg == REG_PC)
+ as_tsktsk (MVE_BAD_PC);
+ }
+
+ unsigned size = inst.operands[1].isscalar == 1 ? 64 : 128;
+
constraint (et.type == NT_invtype, _("bad type for scalar"));
- constraint (x >= 64 / et.size, _("scalar index out of range"));
+ constraint (x >= size / et.size, _("scalar index out of range"));
switch (et.size)
{
default: ;
}
- abcdebits |= x << logsize;
+ abcdebits |= (x & ((1 << (3-logsize)) - 1)) << logsize;
inst.instruction = 0xe100b10;
do_vfp_cond_or_thumb ();
inst.instruction |= LOW4 (dn) << 16;
inst.instruction |= inst.operands[0].reg << 12;
inst.instruction |= (abcdebits & 3) << 5;
inst.instruction |= (abcdebits >> 2) << 21;
+ inst.instruction |= (x >> (3-logsize)) << 16;
}
break;
case NS_RRD: /* case 7 (fmrrd). */
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2),
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
_(BAD_FPU));
inst.instruction = 0xc500b10;
do_scalar_fp16_v82_encode ();
break;
+ case NS_RRSS:
+ do_mve_mov (0);
+ break;
+ case NS_SSRR:
+ do_mve_mov (1);
+ break;
+
/* The encoders for the fmrrs and fmsrr instructions expect three operands
(one of which is a list), but we have parsed four. Do some fiddling to
make the operands what do_vfp_reg2_from_sp2 and do_vfp_sp2_from_reg2
expect. */
case NS_RRFF: /* case 14 (fmrrs). */
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
constraint (inst.operands[3].reg != inst.operands[2].reg + 1,
_("VFP registers must be adjacent"));
inst.operands[2].imm = 2;
break;
case NS_FFRR: /* case 15 (fmsrr). */
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
constraint (inst.operands[1].reg != inst.operands[0].reg + 1,
_("VFP registers must be adjacent"));
inst.operands[1] = inst.operands[2];
}
}
+static void
+do_mve_movl (void)
+{
+ if (!(inst.operands[0].present && inst.operands[0].isquad
+ && inst.operands[1].present && inst.operands[1].isquad
+ && !inst.operands[2].present))
+ {
+ inst.instruction = 0;
+ inst.cond = 0xb;
+ if (thumb_mode)
+ set_pred_insn_type (INSIDE_IT_INSN);
+ do_neon_mov ();
+ return;
+ }
+
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ return;
+
+ if (inst.cond != COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+
+ struct neon_type_el et = neon_check_type (2, NS_QQ, N_EQK, N_S8 | N_U8
+ | N_S16 | N_U16 | N_KEY);
+
+ inst.instruction |= (et.type == NT_unsigned) << 28;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= (neon_logbits (et.size) + 1) << 19;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.is_neon = 1;
+}
+
static void
do_neon_rshift_round_imm (void)
{
- enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
- struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ enum neon_shape rs;
+ struct neon_type_el et;
+
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_SU_MVE | N_KEY);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+ }
int imm = inst.operands[2].imm;
/* imm == 0 case is encoded as VMOV for V{R}SHR. */
static void
do_neon_sat_abs_neg (void)
{
- enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
+ return;
+
+ enum neon_shape rs;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ rs = neon_select_shape (NS_QQ, NS_NULL);
+ else
+ rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
struct neon_type_el et = neon_check_type (2, rs,
N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
neon_two_same (neon_quad (rs), 1, et.size);
static void
do_neon_cls (void)
{
- enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ enum neon_shape rs;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ rs = neon_select_shape (NS_QQ, NS_NULL);
+ else
+ rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+
struct neon_type_el et = neon_check_type (2, rs,
N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
neon_two_same (neon_quad (rs), 1, et.size);
static void
do_neon_clz (void)
{
- enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+ return;
+
+ enum neon_shape rs;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ rs = neon_select_shape (NS_QQ, NS_NULL);
+ else
+ rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+
struct neon_type_el et = neon_check_type (2, rs,
N_EQK, N_I8 | N_I16 | N_I32 | N_KEY);
neon_two_same (neon_quad (rs), 1, et.size);
static void
do_neon_ldm_stm (void)
{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+ _(BAD_FPU));
/* P, U and L bits are part of bitmask. */
int is_dbmode = (inst.instruction & (1 << 24)) != 0;
unsigned offsetbits = inst.operands[1].imm * 2;
do_vfp_cond_or_thumb ();
}
+static void
+do_vfp_nsyn_pop (void)
+{
+ nsyn_insert_sp ();
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
+ return do_vfp_nsyn_opcode ("vldm");
+ }
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
+ _(BAD_FPU));
+
+ constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+ _("register list must contain at least 1 and at most 16 "
+ "registers"));
+
+ if (inst.operands[1].issingle)
+ do_vfp_nsyn_opcode ("fldmias");
+ else
+ do_vfp_nsyn_opcode ("fldmiad");
+}
+
+static void
+do_vfp_nsyn_push (void)
+{
+ nsyn_insert_sp ();
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
+ return do_vfp_nsyn_opcode ("vstmdb");
+ }
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
+ _(BAD_FPU));
+
+ constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+ _("register list must contain at least 1 and at most 16 "
+ "registers"));
+
+ if (inst.operands[1].issingle)
+ do_vfp_nsyn_opcode ("fstmdbs");
+ else
+ do_vfp_nsyn_opcode ("fstmdbd");
+}
+
+
static void
do_neon_ldr_str (void)
{
/* VLDR/VSTR. */
else
{
- if (!mark_feature_used (&fpu_vfp_ext_v1xd))
+ if (!mark_feature_used (&fpu_vfp_ext_v1xd)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
as_bad (_("Instruction not permitted on this architecture"));
do_neon_ldr_str ();
}
static void
do_vmaxnm (void)
{
- set_pred_insn_type (OUTSIDE_PRED_INSN);
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
if (try_vfp_nsyn (3, do_vfp_nsyn_fpv8) == SUCCESS)
return;
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
+ if (!check_simd_pred_availability (TRUE, NEON_CHECK_CC | NEON_CHECK_ARCH8))
return;
neon_dyadic_misc (NT_untyped, N_F_16_32, 0);
if (et.type == NT_invtype)
return;
- set_pred_insn_type (OUTSIDE_PRED_INSN);
- NEON_ENCODE (FLOAT, inst);
-
- if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
+ if (!check_simd_pred_availability (TRUE,
+ NEON_CHECK_CC | NEON_CHECK_ARCH8))
return;
+ NEON_ENCODE (FLOAT, inst);
+
inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
inst.instruction |= HI1 (inst.operands[0].reg) << 22;
inst.instruction |= LOW4 (inst.operands[1].reg);
static void
do_vcmla (void)
{
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
- _(BAD_FPU));
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)
+ && (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8)
+ || !mark_feature_used (&arm_ext_v8_3)), (BAD_FPU));
constraint (inst.relocs[0].exp.X_op != O_constant,
_("expression too complex"));
unsigned rot = inst.relocs[0].exp.X_add_number;
constraint (rot != 0 && rot != 90 && rot != 180 && rot != 270,
_("immediate out of range"));
rot /= 90;
+
+ if (!check_simd_pred_availability (TRUE,
+ NEON_CHECK_ARCH8 | NEON_CHECK_CC))
+ return;
+
if (inst.operands[2].isscalar)
{
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext))
+ first_error (_("invalid instruction shape"));
enum neon_shape rs = neon_select_shape (NS_DDSI, NS_QQSI, NS_NULL);
unsigned size = neon_check_type (3, rs, N_EQK, N_EQK,
N_KEY | N_F16 | N_F32).size;
}
else
{
- enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL);
+ enum neon_shape rs;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext))
+ rs = neon_select_shape (NS_QQQI, NS_NULL);
+ else
+ rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL);
+
unsigned size = neon_check_type (3, rs, N_EQK, N_EQK,
N_KEY | N_F16 | N_F32).size;
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext) && size == 32
+ && (inst.operands[0].reg == inst.operands[1].reg
+ || inst.operands[0].reg == inst.operands[2].reg))
+ as_tsktsk (BAD_MVE_SRCDEST);
+
neon_three_same (neon_quad (rs), 0, -1);
inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */
inst.instruction |= 0xfc200800;
}
static void
-do_vcadd (void)
+do_vcadd (void)
+{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+ && (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8)
+ || !mark_feature_used (&arm_ext_v8_3)), (BAD_FPU));
+ constraint (inst.relocs[0].exp.X_op != O_constant,
+ _("expression too complex"));
+
+ unsigned rot = inst.relocs[0].exp.X_add_number;
+ constraint (rot != 90 && rot != 270, _("immediate out of range"));
+ enum neon_shape rs;
+ struct neon_type_el et;
+ if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+ {
+ rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16 | N_F32);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_QQQI, NS_NULL);
+ et = neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16 | N_F32 | N_I8
+ | N_I16 | N_I32);
+ if (et.size == 32 && inst.operands[0].reg == inst.operands[2].reg)
+ as_tsktsk (_("Warning: 32-bit element size and same first and third "
+ "operand makes instruction UNPREDICTABLE"));
+ }
+
+ if (et.type == NT_invtype)
+ return;
+
+ if (!check_simd_pred_availability (et.type == NT_float,
+ NEON_CHECK_ARCH8 | NEON_CHECK_CC))
+ return;
+
+ if (et.type == NT_float)
+ {
+ neon_three_same (neon_quad (rs), 0, -1);
+ inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */
+ inst.instruction |= 0xfc800800;
+ inst.instruction |= (rot == 270) << 24;
+ inst.instruction |= (et.size == 32) << 20;
+ }
+ else
+ {
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+ inst.instruction = 0xfe000f00;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= neon_logbits (et.size) << 20;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= (rot == 270) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.is_neon = 1;
+ }
+}
+
+/* Dot Product instructions encoding support. */
+
+static void
+do_neon_dotproduct (int unsigned_p)
+{
+ enum neon_shape rs;
+ unsigned scalar_oprd2 = 0;
+ int high8;
+
+ if (inst.cond != COND_ALWAYS)
+ as_warn (_("Dot Product instructions cannot be conditional, the behaviour "
+ "is UNPREDICTABLE"));
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
+ _(BAD_FPU));
+
+ /* Dot Product instructions are in three-same D/Q register format or the third
+ operand can be a scalar index register. */
+ if (inst.operands[2].isscalar)
+ {
+ scalar_oprd2 = neon_scalar_for_mul (inst.operands[2].reg, 32);
+ high8 = 0xfe000000;
+ rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ }
+ else
+ {
+ high8 = 0xfc000000;
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ }
+
+ if (unsigned_p)
+ neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_U8);
+ else
+ neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_S8);
+
+ /* The "U" bit in traditional Three Same encoding is fixed to 0 for Dot
+ Product instruction, so we pass 0 as the "ubit" parameter. And the
+ "Size" field are fixed to 0x2, so we pass 32 as the "size" parameter. */
+ neon_three_same (neon_quad (rs), 0, 32);
+
+ /* Undo neon_dp_fixup. Dot Product instructions are using a slightly
+ different NEON three-same encoding. */
+ inst.instruction &= 0x00ffffff;
+ inst.instruction |= high8;
+ /* Encode 'U' bit which indicates signedness. */
+ inst.instruction |= (unsigned_p ? 1 : 0) << 4;
+ /* Re-encode operand2 if it's indexed scalar operand. What has been encoded
+ from inst.operand[2].reg in neon_three_same is GAS's internal encoding, not
+ the instruction encoding. */
+ if (inst.operands[2].isscalar)
+ {
+ inst.instruction &= 0xffffffd0;
+ inst.instruction |= LOW4 (scalar_oprd2);
+ inst.instruction |= HI1 (scalar_oprd2) << 5;
+ }
+}
+
+/* Dot Product instructions for signed integer. */
+
+static void
+do_neon_dotproduct_s (void)
+{
+ return do_neon_dotproduct (0);
+}
+
+/* Dot Product instructions for unsigned integer. */
+
+static void
+do_neon_dotproduct_u (void)
+{
+ return do_neon_dotproduct (1);
+}
+
+static void
+do_vusdot (void)
+{
+ enum neon_shape rs;
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+ if (inst.operands[2].isscalar)
+ {
+ rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+ inst.instruction |= (1 << 25);
+ int index = inst.operands[2].reg & 0xf;
+ constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+ inst.operands[2].reg >>= 4;
+ constraint (!(inst.operands[2].reg < 16),
+ _("indexed register must be less than 16"));
+ neon_three_args (rs == NS_QQS);
+ inst.instruction |= (index << 5);
+ }
+ else
+ {
+ inst.instruction |= (1 << 21);
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+ neon_three_args (rs == NS_QQQ);
+ }
+}
+
+static void
+do_vsudot (void)
+{
+ enum neon_shape rs;
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+ if (inst.operands[2].isscalar)
+ {
+ rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+ inst.instruction |= (1 << 25);
+ int index = inst.operands[2].reg & 0xf;
+ constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+ inst.operands[2].reg >>= 4;
+ constraint (!(inst.operands[2].reg < 16),
+ _("indexed register must be less than 16"));
+ neon_three_args (rs == NS_QQS);
+ inst.instruction |= (index << 5);
+ }
+}
+
+static void
+do_vsmmla (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+ neon_three_args (1);
+
+}
+
+static void
+do_vummla (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+ neon_three_args (1);
+
+}
+
+static void
+check_cde_operand (size_t index, int is_dual)
+{
+ unsigned Rx = inst.operands[index].reg;
+ bfd_boolean isvec = inst.operands[index].isvec;
+ if (is_dual == 0 && thumb_mode)
+ constraint (
+ !((Rx <= 14 && Rx != 13) || (Rx == REG_PC && isvec)),
+ _("Register must be r0-r14 except r13, or APSR_nzcv."));
+ else
+ constraint ( !((Rx <= 10 && Rx % 2 == 0 )),
+ _("Register must be an even register between r0-r10."));
+}
+
+static bfd_boolean
+cde_coproc_enabled (unsigned coproc)
+{
+ switch (coproc)
+ {
+ case 0: return mark_feature_used (&arm_ext_cde0);
+ case 1: return mark_feature_used (&arm_ext_cde1);
+ case 2: return mark_feature_used (&arm_ext_cde2);
+ case 3: return mark_feature_used (&arm_ext_cde3);
+ case 4: return mark_feature_used (&arm_ext_cde4);
+ case 5: return mark_feature_used (&arm_ext_cde5);
+ case 6: return mark_feature_used (&arm_ext_cde6);
+ case 7: return mark_feature_used (&arm_ext_cde7);
+ default: return FALSE;
+ }
+}
+
+#define cde_coproc_pos 8
+static void
+cde_handle_coproc (void)
+{
+ unsigned coproc = inst.operands[0].reg;
+ constraint (coproc > 7, _("CDE Coprocessor must be in range 0-7"));
+ constraint (!(cde_coproc_enabled (coproc)), BAD_CDE_COPROC);
+ inst.instruction |= coproc << cde_coproc_pos;
+}
+#undef cde_coproc_pos
+
+static void
+cxn_handle_predication (bfd_boolean is_accum)
+{
+ if (is_accum && conditional_insn ())
+ set_pred_insn_type (INSIDE_IT_INSN);
+ else if (conditional_insn ())
+ /* conditional_insn essentially checks for a suffix, not whether the
+ instruction is inside an IT block or not.
+ The non-accumulator versions should not have suffixes. */
+ inst.error = BAD_SYNTAX;
+ else
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+}
+
+static void
+do_custom_instruction_1 (int is_dual, bfd_boolean is_accum)
+{
+
+ constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE));
+
+ unsigned imm, Rd;
+
+ Rd = inst.operands[1].reg;
+ check_cde_operand (1, is_dual);
+
+ if (is_dual == 1)
+ {
+ constraint (inst.operands[2].reg != Rd + 1,
+ _("cx1d requires consecutive destination registers."));
+ imm = inst.operands[3].imm;
+ }
+ else if (is_dual == 0)
+ imm = inst.operands[2].imm;
+ else
+ abort ();
+
+ inst.instruction |= Rd << 12;
+ inst.instruction |= (imm & 0x1F80) << 9;
+ inst.instruction |= (imm & 0x0040) << 1;
+ inst.instruction |= (imm & 0x003f);
+
+ cde_handle_coproc ();
+ cxn_handle_predication (is_accum);
+}
+
+static void
+do_custom_instruction_2 (int is_dual, bfd_boolean is_accum)
+{
+
+ constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE));
+
+ unsigned imm, Rd, Rn;
+
+ Rd = inst.operands[1].reg;
+
+ if (is_dual == 1)
+ {
+ constraint (inst.operands[2].reg != Rd + 1,
+ _("cx2d requires consecutive destination registers."));
+ imm = inst.operands[4].imm;
+ Rn = inst.operands[3].reg;
+ }
+ else if (is_dual == 0)
+ {
+ imm = inst.operands[3].imm;
+ Rn = inst.operands[2].reg;
+ }
+ else
+ abort ();
+
+ check_cde_operand (2 + is_dual, /* is_dual = */0);
+ check_cde_operand (1, is_dual);
+
+ inst.instruction |= Rd << 12;
+ inst.instruction |= Rn << 16;
+
+ inst.instruction |= (imm & 0x0380) << 13;
+ inst.instruction |= (imm & 0x0040) << 1;
+ inst.instruction |= (imm & 0x003f);
+
+ cde_handle_coproc ();
+ cxn_handle_predication (is_accum);
+}
+
+static void
+do_custom_instruction_3 (int is_dual, bfd_boolean is_accum)
+{
+
+ constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE));
+
+ unsigned imm, Rd, Rn, Rm;
+
+ Rd = inst.operands[1].reg;
+
+ if (is_dual == 1)
+ {
+ constraint (inst.operands[2].reg != Rd + 1,
+ _("cx3d requires consecutive destination registers."));
+ imm = inst.operands[5].imm;
+ Rn = inst.operands[3].reg;
+ Rm = inst.operands[4].reg;
+ }
+ else if (is_dual == 0)
+ {
+ imm = inst.operands[4].imm;
+ Rn = inst.operands[2].reg;
+ Rm = inst.operands[3].reg;
+ }
+ else
+ abort ();
+
+ check_cde_operand (1, is_dual);
+ check_cde_operand (2 + is_dual, /* is_dual = */0);
+ check_cde_operand (3 + is_dual, /* is_dual = */0);
+
+ inst.instruction |= Rd;
+ inst.instruction |= Rn << 16;
+ inst.instruction |= Rm << 12;
+
+ inst.instruction |= (imm & 0x0038) << 17;
+ inst.instruction |= (imm & 0x0004) << 5;
+ inst.instruction |= (imm & 0x0003) << 4;
+
+ cde_handle_coproc ();
+ cxn_handle_predication (is_accum);
+}
+
+static void
+do_cx1 (void)
+{
+ return do_custom_instruction_1 (0, 0);
+}
+
+static void
+do_cx1a (void)
+{
+ return do_custom_instruction_1 (0, 1);
+}
+
+static void
+do_cx1d (void)
+{
+ return do_custom_instruction_1 (1, 0);
+}
+
+static void
+do_cx1da (void)
+{
+ return do_custom_instruction_1 (1, 1);
+}
+
+static void
+do_cx2 (void)
{
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
- _(BAD_FPU));
- constraint (inst.relocs[0].exp.X_op != O_constant,
- _("expression too complex"));
- unsigned rot = inst.relocs[0].exp.X_add_number;
- constraint (rot != 90 && rot != 270, _("immediate out of range"));
- enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL);
- unsigned size = neon_check_type (3, rs, N_EQK, N_EQK,
- N_KEY | N_F16 | N_F32).size;
- neon_three_same (neon_quad (rs), 0, -1);
- inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */
- inst.instruction |= 0xfc800800;
- inst.instruction |= (rot == 270) << 24;
- inst.instruction |= (size == 32) << 20;
+ return do_custom_instruction_2 (0, 0);
}
-/* Dot Product instructions encoding support. */
+static void
+do_cx2a (void)
+{
+ return do_custom_instruction_2 (0, 1);
+}
static void
-do_neon_dotproduct (int unsigned_p)
+do_cx2d (void)
{
- enum neon_shape rs;
- unsigned scalar_oprd2 = 0;
- int high8;
+ return do_custom_instruction_2 (1, 0);
+}
- if (inst.cond != COND_ALWAYS)
- as_warn (_("Dot Product instructions cannot be conditional, the behaviour "
- "is UNPREDICTABLE"));
+static void
+do_cx2da (void)
+{
+ return do_custom_instruction_2 (1, 1);
+}
- constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
- _(BAD_FPU));
+static void
+do_cx3 (void)
+{
+ return do_custom_instruction_3 (0, 0);
+}
- /* Dot Product instructions are in three-same D/Q register format or the third
- operand can be a scalar index register. */
- if (inst.operands[2].isscalar)
- {
- scalar_oprd2 = neon_scalar_for_mul (inst.operands[2].reg, 32);
- high8 = 0xfe000000;
- rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
- }
+static void
+do_cx3a (void)
+{
+ return do_custom_instruction_3 (0, 1);
+}
+
+static void
+do_cx3d (void)
+{
+ return do_custom_instruction_3 (1, 0);
+}
+
+static void
+do_cx3da (void)
+{
+ return do_custom_instruction_3 (1, 1);
+}
+
+static void
+vcx_assign_vec_d (unsigned regnum)
+{
+ inst.instruction |= HI4 (regnum) << 12;
+ inst.instruction |= LOW1 (regnum) << 22;
+}
+
+static void
+vcx_assign_vec_m (unsigned regnum)
+{
+ inst.instruction |= HI4 (regnum);
+ inst.instruction |= LOW1 (regnum) << 5;
+}
+
+static void
+vcx_assign_vec_n (unsigned regnum)
+{
+ inst.instruction |= HI4 (regnum) << 16;
+ inst.instruction |= LOW1 (regnum) << 7;
+}
+
+enum vcx_reg_type {
+ q_reg,
+ d_reg,
+ s_reg
+};
+
+static enum vcx_reg_type
+vcx_get_reg_type (enum neon_shape ns)
+{
+ gas_assert (ns == NS_PQI
+ || ns == NS_PDI
+ || ns == NS_PFI
+ || ns == NS_PQQI
+ || ns == NS_PDDI
+ || ns == NS_PFFI
+ || ns == NS_PQQQI
+ || ns == NS_PDDDI
+ || ns == NS_PFFFI);
+ if (ns == NS_PQI || ns == NS_PQQI || ns == NS_PQQQI)
+ return q_reg;
+ if (ns == NS_PDI || ns == NS_PDDI || ns == NS_PDDDI)
+ return d_reg;
+ return s_reg;
+}
+
+#define vcx_size_pos 24
+#define vcx_vec_pos 6
+static unsigned
+vcx_handle_shape (enum vcx_reg_type reg_type)
+{
+ unsigned mult = 2;
+ if (reg_type == q_reg)
+ inst.instruction |= 1 << vcx_vec_pos;
+ else if (reg_type == d_reg)
+ inst.instruction |= 1 << vcx_size_pos;
else
+ mult = 1;
+ /* NOTE:
+ The documentation says that the Q registers are encoded as 2*N in the D:Vd
+ bits (or equivalent for N and M registers).
+ Similarly the D registers are encoded as N in D:Vd bits.
+ While the S registers are encoded as N in the Vd:D bits.
+
+ Taking into account the maximum values of these registers we can see a
+ nicer pattern for calculation:
+ Q -> 7, D -> 15, S -> 31
+
+ If we say that everything is encoded in the Vd:D bits, then we can say
+ that Q is encoded as 4*N, and D is encoded as 2*N.
+ This way the bits will end up the same, and calculation is simpler.
+ (calculation is now:
+ 1. Multiply by a number determined by the register letter.
+ 2. Encode resulting number in Vd:D bits.)
+
+ This is made a little more complicated by automatic handling of 'Q'
+ registers elsewhere, which means the register number is already 2*N where
+ N is the number the user wrote after the register letter.
+ */
+ return mult;
+}
+#undef vcx_vec_pos
+#undef vcx_size_pos
+
+static void
+vcx_ensure_register_in_range (unsigned R, enum vcx_reg_type reg_type)
+{
+ if (reg_type == q_reg)
{
- high8 = 0xfc000000;
- rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ gas_assert (R % 2 == 0);
+ constraint (R >= 16, _("'q' register must be in range 0-7"));
}
-
- if (unsigned_p)
- neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_U8);
+ else if (reg_type == d_reg)
+ constraint (R >= 16, _("'d' register must be in range 0-15"));
else
- neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_S8);
+ constraint (R >= 32, _("'s' register must be in range 0-31"));
+}
- /* The "U" bit in traditional Three Same encoding is fixed to 0 for Dot
- Product instruction, so we pass 0 as the "ubit" parameter. And the
- "Size" field are fixed to 0x2, so we pass 32 as the "size" parameter. */
- neon_three_same (neon_quad (rs), 0, 32);
+static void (*vcx_assign_vec[3]) (unsigned) = {
+ vcx_assign_vec_d,
+ vcx_assign_vec_m,
+ vcx_assign_vec_n
+};
- /* Undo neon_dp_fixup. Dot Product instructions are using a slightly
- different NEON three-same encoding. */
- inst.instruction &= 0x00ffffff;
- inst.instruction |= high8;
- /* Encode 'U' bit which indicates signedness. */
- inst.instruction |= (unsigned_p ? 1 : 0) << 4;
- /* Re-encode operand2 if it's indexed scalar operand. What has been encoded
- from inst.operand[2].reg in neon_three_same is GAS's internal encoding, not
- the instruction encoding. */
- if (inst.operands[2].isscalar)
+static void
+vcx_handle_register_arguments (unsigned num_registers,
+ enum vcx_reg_type reg_type)
+{
+ unsigned R, i;
+ unsigned reg_mult = vcx_handle_shape (reg_type);
+ for (i = 0; i < num_registers; i++)
{
- inst.instruction &= 0xffffffd0;
- inst.instruction |= LOW4 (scalar_oprd2);
- inst.instruction |= HI1 (scalar_oprd2) << 5;
+ R = inst.operands[i+1].reg;
+ vcx_ensure_register_in_range (R, reg_type);
+ if (num_registers == 3 && i > 0)
+ {
+ if (i == 2)
+ vcx_assign_vec[1] (R * reg_mult);
+ else
+ vcx_assign_vec[2] (R * reg_mult);
+ continue;
+ }
+ vcx_assign_vec[i](R * reg_mult);
}
}
-/* Dot Product instructions for signed integer. */
+static void
+vcx_handle_insn_block (enum vcx_reg_type reg_type)
+{
+ if (reg_type == q_reg)
+ if (inst.cond > COND_ALWAYS)
+ inst.pred_insn_type = INSIDE_VPT_INSN;
+ else
+ inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+ else if (inst.cond == COND_ALWAYS)
+ inst.pred_insn_type = OUTSIDE_PRED_INSN;
+ else
+ inst.error = BAD_NOT_IT;
+}
static void
-do_neon_dotproduct_s (void)
+vcx_handle_common_checks (unsigned num_args, enum neon_shape rs)
{
- return do_neon_dotproduct (0);
+ constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE));
+ cde_handle_coproc ();
+ enum vcx_reg_type reg_type = vcx_get_reg_type (rs);
+ vcx_handle_register_arguments (num_args, reg_type);
+ vcx_handle_insn_block (reg_type);
+ if (reg_type == q_reg)
+ constraint (!mark_feature_used (&mve_ext),
+ _("vcx instructions with Q registers require MVE"));
+ else
+ constraint (!(ARM_FSET_CPU_SUBSET (armv8m_fp, cpu_variant)
+ && mark_feature_used (&armv8m_fp))
+ && !mark_feature_used (&mve_ext),
+ _("vcx instructions with S or D registers require either MVE"
+ " or Armv8-M floating point etension."));
}
-/* Dot Product instructions for unsigned integer. */
+static void
+do_vcx1 (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_PQI, NS_PDI, NS_PFI, NS_NULL);
+ vcx_handle_common_checks (1, rs);
+
+ unsigned imm = inst.operands[2].imm;
+ inst.instruction |= (imm & 0x03f);
+ inst.instruction |= (imm & 0x040) << 1;
+ inst.instruction |= (imm & 0x780) << 9;
+ if (rs != NS_PQI)
+ constraint (imm >= 2048,
+ _("vcx1 with S or D registers takes immediate within 0-2047"));
+ inst.instruction |= (imm & 0x800) << 13;
+}
static void
-do_neon_dotproduct_u (void)
+do_vcx2 (void)
{
- return do_neon_dotproduct (1);
+ enum neon_shape rs = neon_select_shape (NS_PQQI, NS_PDDI, NS_PFFI, NS_NULL);
+ vcx_handle_common_checks (2, rs);
+
+ unsigned imm = inst.operands[3].imm;
+ inst.instruction |= (imm & 0x01) << 4;
+ inst.instruction |= (imm & 0x02) << 6;
+ inst.instruction |= (imm & 0x3c) << 14;
+ if (rs != NS_PQQI)
+ constraint (imm >= 64,
+ _("vcx2 with S or D registers takes immediate within 0-63"));
+ inst.instruction |= (imm & 0x40) << 18;
+}
+
+static void
+do_vcx3 (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_PQQQI, NS_PDDDI, NS_PFFFI, NS_NULL);
+ vcx_handle_common_checks (3, rs);
+
+ unsigned imm = inst.operands[4].imm;
+ inst.instruction |= (imm & 0x1) << 4;
+ inst.instruction |= (imm & 0x6) << 19;
+ if (rs != NS_PQQQI)
+ constraint (imm >= 8,
+ _("vcx2 with S or D registers takes immediate within 0-7"));
+ inst.instruction |= (imm & 0x8) << 21;
}
/* Crypto v1 instructions. */
do_vfp_cond_or_thumb ();
}
+static void
+do_vdot (void)
+{
+ enum neon_shape rs;
+ constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+ if (inst.operands[2].isscalar)
+ {
+ rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+
+ inst.instruction |= (1 << 25);
+ int index = inst.operands[2].reg & 0xf;
+ constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+ inst.operands[2].reg >>= 4;
+ constraint (!(inst.operands[2].reg < 16),
+ _("indexed register must be less than 16"));
+ neon_three_args (rs == NS_QQS);
+ inst.instruction |= (index << 5);
+ }
+ else
+ {
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+ neon_three_args (rs == NS_QQQ);
+ }
+}
+
+static void
+do_vmmla (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+
+ constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+ neon_three_args (1);
+}
+
\f
/* Overall per-instruction processing. */
close_automatic_it_block ();
break;
+ /* Fallthrough. */
case NEUTRAL_IT_INSN:
now_pred.block_length++;
now_pred.insn_cond = TRUE;
handle_pred_state ();
if (now_pred.insn_cond
+ && warn_on_restrict_it
&& !now_pred.warn_deprecated
&& warn_on_deprecated
- && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8)
+ && (ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8)
+ || ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8r))
&& !ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_m))
{
if (inst.instruction >= 0x10000)
out of the jump table, and chaos would ensue. */
if (label_is_thumb_function_name
&& (S_GET_NAME (sym)[0] != '.' || S_GET_NAME (sym)[1] != 'L')
- && (bfd_get_section_flags (stdoutput, now_seg) & SEC_CODE) != 0)
+ && (bfd_section_flags (now_seg) & SEC_CODE) != 0)
{
/* When the address of a Thumb function is taken the bottom
bit of that address should be set. This will allow
REGDEF(WR, 7,RN), REGDEF(SB, 9,RN), REGDEF(SL,10,RN), REGDEF(FP,11,RN),
REGDEF(IP,12,RN), REGDEF(SP,13,RN), REGDEF(LR,14,RN), REGDEF(PC,15,RN),
+ /* Defining the new Zero register from ARMv8.1-M. */
+ REGDEF(zr,15,ZR),
+ REGDEF(ZR,15,ZR),
+
/* Coprocessor numbers. */
REGSET(p, CP), REGSET(P, CP),
REGDEF(mvfr0,7,VFC), REGDEF(mvfr1,6,VFC),
REGDEF(MVFR0,7,VFC), REGDEF(MVFR1,6,VFC),
REGDEF(mvfr2,5,VFC), REGDEF(MVFR2,5,VFC),
+ REGDEF(fpscr_nzcvqc,2,VFC), REGDEF(FPSCR_nzcvqc,2,VFC),
+ REGDEF(vpr,12,VFC), REGDEF(VPR,12,VFC),
+ REGDEF(fpcxt_ns,14,VFC), REGDEF(FPCXT_NS,14,VFC),
+ REGDEF(fpcxt_s,15,VFC), REGDEF(FPCXT_S,15,VFC),
/* Maverick DSP coprocessor registers. */
REGSET(mvf,MVF), REGSET(mvd,MVD), REGSET(mvfx,MVFX), REGSET(mvdx,MVDX),
#define cCE(mnem, op, nops, ops, ae) \
{ mnem, OPS##nops ops, OT_csuffix, 0x##op, 0xe##op, ARM_VARIANT, ARM_VARIANT, do_##ae, do_##ae, 0 }
+/* mov instructions that are shared between coprocessor and MVE. */
+#define mcCE(mnem, op, nops, ops, ae) \
+ { #mnem, OPS##nops ops, OT_csuffix, 0x##op, 0xe##op, ARM_VARIANT, THUMB_VARIANT, do_##ae, do_##ae, 0 }
+
/* Legacy coprocessor instructions where conditional infix and conditional
suffix are ambiguous. For consistency this includes all FPA instructions,
not just the potentially ambiguous ones. */
nUF(vselvs, _vselvs, 3, (RVSD, RVSD, RVSD), vsel),
nUF(vselge, _vselge, 3, (RVSD, RVSD, RVSD), vsel),
nUF(vselgt, _vselgt, 3, (RVSD, RVSD, RVSD), vsel),
- nUF(vmaxnm, _vmaxnm, 3, (RNSDQ, oRNSDQ, RNSDQ), vmaxnm),
- nUF(vminnm, _vminnm, 3, (RNSDQ, oRNSDQ, RNSDQ), vmaxnm),
- nUF(vcvta, _vcvta, 2, (RNSDQ, oRNSDQ), neon_cvta),
- nUF(vcvtn, _vcvta, 2, (RNSDQ, oRNSDQ), neon_cvtn),
- nUF(vcvtp, _vcvta, 2, (RNSDQ, oRNSDQ), neon_cvtp),
- nUF(vcvtm, _vcvta, 2, (RNSDQ, oRNSDQ), neon_cvtm),
nCE(vrintr, _vrintr, 2, (RNSDQ, oRNSDQ), vrintr),
- nCE(vrintz, _vrintr, 2, (RNSDQ, oRNSDQ), vrintz),
- nCE(vrintx, _vrintr, 2, (RNSDQ, oRNSDQ), vrintx),
- nUF(vrinta, _vrinta, 2, (RNSDQ, oRNSDQ), vrinta),
- nUF(vrintn, _vrinta, 2, (RNSDQ, oRNSDQ), vrintn),
- nUF(vrintp, _vrinta, 2, (RNSDQ, oRNSDQ), vrintp),
- nUF(vrintm, _vrinta, 2, (RNSDQ, oRNSDQ), vrintm),
+ mnCE(vrintz, _vrintr, 2, (RNSDQMQ, oRNSDQMQ), vrintz),
+ mnCE(vrintx, _vrintr, 2, (RNSDQMQ, oRNSDQMQ), vrintx),
+ mnUF(vrinta, _vrinta, 2, (RNSDQMQ, oRNSDQMQ), vrinta),
+ mnUF(vrintn, _vrinta, 2, (RNSDQMQ, oRNSDQMQ), vrintn),
+ mnUF(vrintp, _vrinta, 2, (RNSDQMQ, oRNSDQMQ), vrintp),
+ mnUF(vrintm, _vrinta, 2, (RNSDQMQ, oRNSDQMQ), vrintm),
/* Crypto v1 extensions. */
#undef ARM_VARIANT
nUF(sha256su0, _sha2op, 2, (RNQ, RNQ), sha256su0),
#undef ARM_VARIANT
-#define ARM_VARIANT & crc_ext_armv8
+#define ARM_VARIANT & arm_ext_crc
#undef THUMB_VARIANT
-#define THUMB_VARIANT & crc_ext_armv8
+#define THUMB_VARIANT & arm_ext_crc
TUEc("crc32b", 1000040, fac0f080, 3, (RR, oRR, RR), crc32b),
TUEc("crc32h", 1200040, fac0f090, 3, (RR, oRR, RR), crc32h),
TUEc("crc32w", 1400040, fac0f0a0, 3, (RR, oRR, RR), crc32w),
#undef THUMB_VARIANT
#define THUMB_VARIANT & arm_ext_v8_3
NCE (vjcvt, eb90bc0, 2, (RVS, RVD), vjcvt),
- NUF (vcmla, 0, 4, (RNDQ, RNDQ, RNDQ_RNSC, EXPi), vcmla),
- NUF (vcadd, 0, 4, (RNDQ, RNDQ, RNDQ, EXPi), vcadd),
#undef ARM_VARIANT
#define ARM_VARIANT & fpu_neon_ext_dotprod
#undef ARM_VARIANT
#define ARM_VARIANT & fpu_vfp_ext_v1xd /* VFP V1xD (single precision). */
+#undef THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v6t2
+ mcCE(vmrs, ef00a10, 2, (APSR_RR, RVC), vmrs),
+ mcCE(vmsr, ee00a10, 2, (RVC, RR), vmsr),
+ mcCE(fldd, d100b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst),
+ mcCE(fstd, d000b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst),
+ mcCE(flds, d100a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst),
+ mcCE(fsts, d000a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst),
+
+ /* Memory operations. */
+ mcCE(fldmias, c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia),
+ mcCE(fldmdbs, d300a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb),
+ mcCE(fstmias, c800a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia),
+ mcCE(fstmdbs, d200a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb),
+#undef THUMB_VARIANT
/* Moves and type conversions. */
- cCE("fcpys", eb00a40, 2, (RVS, RVS), vfp_sp_monadic),
- cCE("fmrs", e100a10, 2, (RR, RVS), vfp_reg_from_sp),
- cCE("fmsr", e000a10, 2, (RVS, RR), vfp_sp_from_reg),
cCE("fmstat", ef1fa10, 0, (), noargs),
- cCE("vmrs", ef00a10, 2, (APSR_RR, RVC), vmrs),
- cCE("vmsr", ee00a10, 2, (RVC, RR), vmsr),
cCE("fsitos", eb80ac0, 2, (RVS, RVS), vfp_sp_monadic),
cCE("fuitos", eb80a40, 2, (RVS, RVS), vfp_sp_monadic),
cCE("ftosis", ebd0a40, 2, (RVS, RVS), vfp_sp_monadic),
cCE("fmxr", ee00a10, 2, (RVC, RR), rn_rd),
/* Memory operations. */
- cCE("flds", d100a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst),
- cCE("fsts", d000a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst),
- cCE("fldmias", c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia),
cCE("fldmfds", c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia),
- cCE("fldmdbs", d300a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb),
cCE("fldmeas", d300a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb),
cCE("fldmiax", c900b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmia),
cCE("fldmfdx", c900b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmia),
cCE("fldmdbx", d300b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmdb),
cCE("fldmeax", d300b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmdb),
- cCE("fstmias", c800a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia),
cCE("fstmeas", c800a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia),
- cCE("fstmdbs", d200a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb),
cCE("fstmfds", d200a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb),
cCE("fstmiax", c800b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmia),
cCE("fstmeax", c800b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmia),
/* Double precision load/store are still present on single precision
implementations. */
- cCE("fldd", d100b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst),
- cCE("fstd", d000b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst),
cCE("fldmiad", c900b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmia),
cCE("fldmfdd", c900b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmia),
cCE("fldmdbd", d300b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmdb),
#define ARM_VARIANT & fpu_vfp_ext_v1 /* VFP V1 (Double precision). */
/* Moves and type conversions. */
- cCE("fcpyd", eb00b40, 2, (RVD, RVD), vfp_dp_rd_rm),
cCE("fcvtds", eb70ac0, 2, (RVD, RVS), vfp_dp_sp_cvt),
cCE("fcvtsd", eb70bc0, 2, (RVS, RVD), vfp_sp_dp_cvt),
cCE("fmdhr", e200b10, 2, (RVD, RR), vfp_dp_rn_rd),
cCE("fcmped", eb40bc0, 2, (RVD, RVD), vfp_dp_rd_rm),
cCE("fcmpezd", eb50bc0, 1, (RVD), vfp_dp_rd),
-#undef ARM_VARIANT
-#define ARM_VARIANT & fpu_vfp_ext_v2
-
- cCE("fmsrr", c400a10, 3, (VRSLST, RR, RR), vfp_sp2_from_reg2),
- cCE("fmrrs", c500a10, 3, (RR, RR, VRSLST), vfp_reg2_from_sp2),
- cCE("fmdrr", c400b10, 3, (RVD, RR, RR), vfp_dp_rm_rd_rn),
- cCE("fmrrd", c500b10, 3, (RR, RR, RVD), vfp_dp_rd_rn_rm),
-
/* Instructions which may belong to either the Neon or VFP instruction sets.
Individual encoder functions perform additional architecture checks. */
#undef ARM_VARIANT
#define ARM_VARIANT & fpu_vfp_ext_v1xd
+#undef THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v6t2
+
+ NCE(vldm, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmia, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmdb, d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstm, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmia, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmdb, d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+
+ NCE(vpop, 0, 1, (VRSDLST), vfp_nsyn_pop),
+ NCE(vpush, 0, 1, (VRSDLST), vfp_nsyn_push),
+
#undef THUMB_VARIANT
#define THUMB_VARIANT & fpu_vfp_ext_v1xd
nCE(vnmul, _vnmul, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
nCE(vnmla, _vnmla, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
nCE(vnmls, _vnmls, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
- nCE(vcmp, _vcmp, 2, (RVSD, RSVD_FI0), vfp_nsyn_cmp),
- nCE(vcmpe, _vcmpe, 2, (RVSD, RSVD_FI0), vfp_nsyn_cmp),
- NCE(vpush, 0, 1, (VRSDLST), vfp_nsyn_push),
- NCE(vpop, 0, 1, (VRSDLST), vfp_nsyn_pop),
NCE(vcvtz, 0, 2, (RVSD, RVSD), vfp_nsyn_cvtz),
/* Mnemonics shared by Neon and VFP. */
- nCEF(vmul, _vmul, 3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mul),
- nCEF(vmla, _vmla, 3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
nCEF(vmls, _vmls, 3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
- NCE(vldm, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vldmia, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vldmdb, d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstm, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstmia, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstmdb, d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
-
- nCEF(vcvt, _vcvt, 3, (RNSDQ, RNSDQ, oI32z), neon_cvt),
+ mnCEF(vcvt, _vcvt, 3, (RNSDQMQ, RNSDQMQ, oI32z), neon_cvt),
nCEF(vcvtr, _vcvt, 2, (RNSDQ, RNSDQ), neon_cvtr),
- NCEF(vcvtb, eb20a40, 2, (RVSD, RVSD), neon_cvtb),
- NCEF(vcvtt, eb20a40, 2, (RVSD, RVSD), neon_cvtt),
+ MNCEF(vcvtb, eb20a40, 3, (RVSDMQ, RVSDMQ, oI32b), neon_cvtb),
+ MNCEF(vcvtt, eb20a40, 3, (RVSDMQ, RVSDMQ, oI32b), neon_cvtt),
/* NOTE: All VMOV encoding is special-cased! */
- NCE(vmov, 0, 1, (VMOV), neon_mov),
NCE(vmovq, 0, 1, (VMOV), neon_mov),
#undef THUMB_VARIANT
NCE (vins, eb00ac0, 2, (RVS, RVS), neon_movhf),
/* New backported fma/fms instructions optional in v8.2. */
- NCE (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
- NCE (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
+ NUF (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
+ NUF (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
#undef THUMB_VARIANT
#define THUMB_VARIANT & fpu_neon_ext_v1
/* integer ops, valid types S8 S16 S32 U8 U16 U32. */
NUF(vaba, 0000710, 3, (RNDQ, RNDQ, RNDQ), neon_dyadic_i_su),
NUF(vabaq, 0000710, 3, (RNQ, RNQ, RNQ), neon_dyadic_i_su),
- NUF(vhadd, 0000000, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
NUF(vhaddq, 0000000, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su),
- NUF(vrhadd, 0000100, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
NUF(vrhaddq, 0000100, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su),
- NUF(vhsub, 0000200, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
NUF(vhsubq, 0000200, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su),
/* integer ops, valid types S8 S16 S32 S64 U8 U16 U32 U64. */
- NUF(vqadd, 0000010, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
NUF(vqaddq, 0000010, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su),
- NUF(vqsub, 0000210, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
NUF(vqsubq, 0000210, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su),
- NUF(vrshl, 0000500, 3, (RNDQ, oRNDQ, RNDQ), neon_rshl),
NUF(vrshlq, 0000500, 3, (RNQ, oRNQ, RNQ), neon_rshl),
- NUF(vqrshl, 0000510, 3, (RNDQ, oRNDQ, RNDQ), neon_rshl),
NUF(vqrshlq, 0000510, 3, (RNQ, oRNQ, RNQ), neon_rshl),
/* If not immediate, fall back to neon_dyadic_i64_su.
- shl_imm should accept I8 I16 I32 I64,
- qshl_imm should accept S8 S16 S32 S64 U8 U16 U32 U64. */
- nUF(vshl, _vshl, 3, (RNDQ, oRNDQ, RNDQ_I63b), neon_shl_imm),
- nUF(vshlq, _vshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_shl_imm),
- nUF(vqshl, _vqshl, 3, (RNDQ, oRNDQ, RNDQ_I63b), neon_qshl_imm),
- nUF(vqshlq, _vqshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_qshl_imm),
+ shl should accept I8 I16 I32 I64,
+ qshl should accept S8 S16 S32 S64 U8 U16 U32 U64. */
+ nUF(vshlq, _vshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_shl),
+ nUF(vqshlq, _vqshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_qshl),
/* Logic ops, types optional & ignored. */
- nUF(vand, _vand, 3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic),
nUF(vandq, _vand, 3, (RNQ, oRNQ, RNDQ_Ibig), neon_logic),
- nUF(vbic, _vbic, 3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic),
nUF(vbicq, _vbic, 3, (RNQ, oRNQ, RNDQ_Ibig), neon_logic),
- nUF(vorr, _vorr, 3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic),
nUF(vorrq, _vorr, 3, (RNQ, oRNQ, RNDQ_Ibig), neon_logic),
- nUF(vorn, _vorn, 3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic),
nUF(vornq, _vorn, 3, (RNQ, oRNQ, RNDQ_Ibig), neon_logic),
- nUF(veor, _veor, 3, (RNDQ, oRNDQ, RNDQ), neon_logic),
nUF(veorq, _veor, 3, (RNQ, oRNQ, RNQ), neon_logic),
/* Bitfield ops, untyped. */
NUF(vbsl, 1100110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
NUF(vbifq, 1300110, 3, (RNQ, RNQ, RNQ), neon_bitfield),
/* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32. */
nUF(vabdq, _vabd, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su),
- nUF(vmax, _vmax, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
nUF(vmaxq, _vmax, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su),
- nUF(vmin, _vmin, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
nUF(vminq, _vmin, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su),
/* Comparisons. Types S8 S16 S32 U8 U16 U32 F32. Non-immediate versions fall
back to neon_dyadic_if_su. */
/* VMUL takes I8 I16 I32 F32 P8. */
nUF(vmulq, _vmul, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_mul),
/* VQD{R}MULH takes S16 S32. */
- nUF(vqdmulh, _vqdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
nUF(vqdmulhq, _vqdmulh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh),
- nUF(vqrdmulh, _vqrdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
nUF(vqrdmulhq, _vqrdmulh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh),
NUF(vacge, 0000e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute),
NUF(vacgeq, 0000e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute),
NUF(vrsqrts, 0200f10, 3, (RNDQ, oRNDQ, RNDQ), neon_step),
NUF(vrsqrtsq, 0200f10, 3, (RNQ, oRNQ, RNQ), neon_step),
/* ARM v8.1 extension. */
- nUF (vqrdmlah, _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
nUF (vqrdmlahq, _vqrdmlah, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qrdmlah),
nUF (vqrdmlsh, _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
nUF (vqrdmlshq, _vqrdmlsh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qrdmlah),
/* Data processing with two registers and a shift amount. */
/* Right shifts, and variants with rounding.
Types accepted S8 S16 S32 S64 U8 U16 U32 U64. */
- NUF(vshr, 0800010, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm),
NUF(vshrq, 0800010, 3, (RNQ, oRNQ, I64z), neon_rshift_round_imm),
- NUF(vrshr, 0800210, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm),
NUF(vrshrq, 0800210, 3, (RNQ, oRNQ, I64z), neon_rshift_round_imm),
NUF(vsra, 0800110, 3, (RNDQ, oRNDQ, I64), neon_rshift_round_imm),
NUF(vsraq, 0800110, 3, (RNQ, oRNQ, I64), neon_rshift_round_imm),
NUF(vrsra, 0800310, 3, (RNDQ, oRNDQ, I64), neon_rshift_round_imm),
NUF(vrsraq, 0800310, 3, (RNQ, oRNQ, I64), neon_rshift_round_imm),
/* Shift and insert. Sizes accepted 8 16 32 64. */
- NUF(vsli, 1800510, 3, (RNDQ, oRNDQ, I63), neon_sli),
NUF(vsliq, 1800510, 3, (RNQ, oRNQ, I63), neon_sli),
- NUF(vsri, 1800410, 3, (RNDQ, oRNDQ, I64), neon_sri),
NUF(vsriq, 1800410, 3, (RNQ, oRNQ, I64), neon_sri),
/* QSHL{U} immediate accepts S8 S16 S32 S64 U8 U16 U32 U64. */
- NUF(vqshlu, 1800610, 3, (RNDQ, oRNDQ, I63), neon_qshlu_imm),
NUF(vqshluq, 1800610, 3, (RNQ, oRNQ, I63), neon_qshlu_imm),
/* Right shift immediate, saturating & narrowing, with rounding variants.
Types accepted S16 S32 S64 U16 U32 U64. */
/* CVT with optional immediate for fixed-point variant. */
nUF(vcvtq, _vcvt, 3, (RNQ, RNQ, oI32b), neon_cvt),
- nUF(vmvn, _vmvn, 2, (RNDQ, RNDQ_Ibig), neon_mvn),
nUF(vmvnq, _vmvn, 2, (RNQ, RNDQ_Ibig), neon_mvn),
/* Data processing, three registers of different lengths. */
/* Two registers, miscellaneous. */
/* Reverse. Sizes 8 16 32 (must be < size in opcode). */
- NUF(vrev64, 1b00000, 2, (RNDQ, RNDQ), neon_rev),
NUF(vrev64q, 1b00000, 2, (RNQ, RNQ), neon_rev),
- NUF(vrev32, 1b00080, 2, (RNDQ, RNDQ), neon_rev),
NUF(vrev32q, 1b00080, 2, (RNQ, RNQ), neon_rev),
- NUF(vrev16, 1b00100, 2, (RNDQ, RNDQ), neon_rev),
NUF(vrev16q, 1b00100, 2, (RNQ, RNQ), neon_rev),
/* Vector replicate. Sizes 8 16 32. */
- nCE(vdup, _vdup, 2, (RNDQ, RR_RNSC), neon_dup),
nCE(vdupq, _vdup, 2, (RNQ, RR_RNSC), neon_dup),
/* VMOVL. Types S8 S16 S32 U8 U16 U32. */
NUF(vmovl, 0800a10, 2, (RNQ, RND), neon_movl),
NUF(vuzp, 1b20100, 2, (RNDQ, RNDQ), neon_zip_uzp),
NUF(vuzpq, 1b20100, 2, (RNQ, RNQ), neon_zip_uzp),
/* VQABS / VQNEG. Types S8 S16 S32. */
- NUF(vqabs, 1b00700, 2, (RNDQ, RNDQ), neon_sat_abs_neg),
NUF(vqabsq, 1b00700, 2, (RNQ, RNQ), neon_sat_abs_neg),
- NUF(vqneg, 1b00780, 2, (RNDQ, RNDQ), neon_sat_abs_neg),
NUF(vqnegq, 1b00780, 2, (RNQ, RNQ), neon_sat_abs_neg),
/* Pairwise, lengthening. Types S8 S16 S32 U8 U16 U32. */
NUF(vpadal, 1b00600, 2, (RNDQ, RNDQ), neon_pair_long),
NUF(vrsqrte, 1b30480, 2, (RNDQ, RNDQ), neon_recip_est),
NUF(vrsqrteq, 1b30480, 2, (RNQ, RNQ), neon_recip_est),
/* VCLS. Types S8 S16 S32. */
- NUF(vcls, 1b00400, 2, (RNDQ, RNDQ), neon_cls),
NUF(vclsq, 1b00400, 2, (RNQ, RNQ), neon_cls),
/* VCLZ. Types I8 I16 I32. */
- NUF(vclz, 1b00480, 2, (RNDQ, RNDQ), neon_clz),
NUF(vclzq, 1b00480, 2, (RNQ, RNQ), neon_clz),
/* VCNT. Size 8. */
NUF(vcnt, 1b00500, 2, (RNDQ, RNDQ), neon_cnt),
#define ARM_VARIANT & fpu_vfp_ext_fma
#undef THUMB_VARIANT
#define THUMB_VARIANT & fpu_vfp_ext_fma
- /* Mnemonics shared by Neon and VFP. These are included in the
+ /* Mnemonics shared by Neon, VFP, MVE and BF16. These are included in the
VFP FMA variant; NEON and VFP FMA always includes the NEON
FMA instructions. */
- nCEF(vfma, _vfma, 3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
- nCEF(vfms, _vfms, 3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
+ mnCEF(vfma, _vfma, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR), neon_fmac),
+ TUF ("vfmat", c300850, fc300850, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), mve_vfma, mve_vfma),
+ mnCEF(vfms, _vfms, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ), neon_fmac),
+
/* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas;
the v form should always be used. */
cCE("ffmas", ea00a00, 3, (RVS, RVS, RVS), vfp_sp_dyadic),
/* Armv8.1-M Mainline instructions. */
#undef THUMB_VARIANT
#define THUMB_VARIANT & arm_ext_v8_1m_main
+ toU("cinc", _cinc, 3, (RRnpcsp, RR_ZR, COND), t_cond),
+ toU("cinv", _cinv, 3, (RRnpcsp, RR_ZR, COND), t_cond),
+ toU("cneg", _cneg, 3, (RRnpcsp, RR_ZR, COND), t_cond),
+ toU("csel", _csel, 4, (RRnpcsp, RR_ZR, RR_ZR, COND), t_cond),
+ toU("csetm", _csetm, 2, (RRnpcsp, COND), t_cond),
+ toU("cset", _cset, 2, (RRnpcsp, COND), t_cond),
+ toU("csinc", _csinc, 4, (RRnpcsp, RR_ZR, RR_ZR, COND), t_cond),
+ toU("csinv", _csinv, 4, (RRnpcsp, RR_ZR, RR_ZR, COND), t_cond),
+ toU("csneg", _csneg, 4, (RRnpcsp, RR_ZR, RR_ZR, COND), t_cond),
+
toC("bf", _bf, 2, (EXPs, EXPs), t_branch_future),
toU("bfcsel", _bfcsel, 4, (EXPs, EXPs, EXPs, COND), t_branch_future),
toC("bfx", _bfx, 2, (EXPs, RRnpcsp), t_branch_future),
#undef THUMB_VARIANT
#define THUMB_VARIANT & mve_ext
+ ToC("lsll", ea50010d, 3, (RRe, RRo, RRnpcsp_I32), mve_scalar_shift),
+ ToC("lsrl", ea50011f, 3, (RRe, RRo, I32), mve_scalar_shift),
+ ToC("asrl", ea50012d, 3, (RRe, RRo, RRnpcsp_I32), mve_scalar_shift),
+ ToC("uqrshll", ea51010d, 4, (RRe, RRo, I48_I64, RRnpcsp), mve_scalar_shift1),
+ ToC("sqrshrl", ea51012d, 4, (RRe, RRo, I48_I64, RRnpcsp), mve_scalar_shift1),
+ ToC("uqshll", ea51010f, 3, (RRe, RRo, I32), mve_scalar_shift),
+ ToC("urshrl", ea51011f, 3, (RRe, RRo, I32), mve_scalar_shift),
+ ToC("srshrl", ea51012f, 3, (RRe, RRo, I32), mve_scalar_shift),
+ ToC("sqshll", ea51013f, 3, (RRe, RRo, I32), mve_scalar_shift),
+ ToC("uqrshl", ea500f0d, 2, (RRnpcsp, RRnpcsp), mve_scalar_shift),
+ ToC("sqrshr", ea500f2d, 2, (RRnpcsp, RRnpcsp), mve_scalar_shift),
+ ToC("uqshl", ea500f0f, 2, (RRnpcsp, I32), mve_scalar_shift),
+ ToC("urshr", ea500f1f, 2, (RRnpcsp, I32), mve_scalar_shift),
+ ToC("srshr", ea500f2f, 2, (RRnpcsp, I32), mve_scalar_shift),
+ ToC("sqshl", ea500f3f, 2, (RRnpcsp, I32), mve_scalar_shift),
+
+ ToC("vpt", ee410f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vptt", ee018f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vpte", ee418f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vpttt", ee014f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vptte", ee01cf00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vptet", ee41cf00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vptee", ee414f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vptttt", ee012f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vpttte", ee016f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vpttet", ee01ef00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vpttee", ee01af00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vptett", ee41af00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vptete", ee41ef00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vpteet", ee416f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+ ToC("vpteee", ee412f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
+
ToC("vpst", fe710f4d, 0, (), mve_vpt),
ToC("vpstt", fe318f4d, 0, (), mve_vpt),
ToC("vpste", fe718f4d, 0, (), mve_vpt),
ToC("vpsteee", fe712f4d, 0, (), mve_vpt),
/* MVE and MVE FP only. */
+ mToC("vhcadd", ee000f00, 4, (RMQ, RMQ, RMQ, EXPi), mve_vhcadd),
+ mCEF(vctp, _vctp, 1, (RRnpc), mve_vctp),
+ mCEF(vadc, _vadc, 3, (RMQ, RMQ, RMQ), mve_vadc),
+ mCEF(vadci, _vadci, 3, (RMQ, RMQ, RMQ), mve_vadc),
+ mToC("vsbc", fe300f00, 3, (RMQ, RMQ, RMQ), mve_vsbc),
+ mToC("vsbci", fe301f00, 3, (RMQ, RMQ, RMQ), mve_vsbc),
mCEF(vmullb, _vmullb, 3, (RMQ, RMQ, RMQ), mve_vmull),
mCEF(vabav, _vabav, 3, (RRnpcsp, RMQ, RMQ), mve_vabav),
mCEF(vmladav, _vmladav, 3, (RRe, RMQ, RMQ), mve_vmladav),
mCEF(vldrw, _vldrw, 2, (RMQ, ADDRMVE), mve_vstr_vldr),
mCEF(vldrd, _vldrd, 2, (RMQ, ADDRMVE), mve_vstr_vldr),
+ mCEF(vmovnt, _vmovnt, 2, (RMQ, RMQ), mve_movn),
+ mCEF(vmovnb, _vmovnb, 2, (RMQ, RMQ), mve_movn),
+ mCEF(vbrsr, _vbrsr, 3, (RMQ, RMQ, RR), mve_vbrsr),
+ mCEF(vaddlv, _vaddlv, 3, (RRe, RRo, RMQ), mve_vaddlv),
+ mCEF(vaddlva, _vaddlva, 3, (RRe, RRo, RMQ), mve_vaddlv),
+ mCEF(vaddv, _vaddv, 2, (RRe, RMQ), mve_vaddv),
+ mCEF(vaddva, _vaddva, 2, (RRe, RMQ), mve_vaddv),
+ mCEF(vddup, _vddup, 3, (RMQ, RRe, EXPi), mve_viddup),
+ mCEF(vdwdup, _vdwdup, 4, (RMQ, RRe, RR, EXPi), mve_viddup),
+ mCEF(vidup, _vidup, 3, (RMQ, RRe, EXPi), mve_viddup),
+ mCEF(viwdup, _viwdup, 4, (RMQ, RRe, RR, EXPi), mve_viddup),
+ mToC("vmaxa", ee330e81, 2, (RMQ, RMQ), mve_vmaxa_vmina),
+ mToC("vmina", ee331e81, 2, (RMQ, RMQ), mve_vmaxa_vmina),
+ mCEF(vmaxv, _vmaxv, 2, (RR, RMQ), mve_vmaxv),
+ mCEF(vmaxav, _vmaxav, 2, (RR, RMQ), mve_vmaxv),
+ mCEF(vminv, _vminv, 2, (RR, RMQ), mve_vmaxv),
+ mCEF(vminav, _vminav, 2, (RR, RMQ), mve_vmaxv),
+
+ mCEF(vmlaldav, _vmlaldav, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mCEF(vmlaldava, _vmlaldava, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mCEF(vmlaldavx, _vmlaldavx, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mCEF(vmlaldavax, _vmlaldavax, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mCEF(vmlalv, _vmlaldav, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mCEF(vmlalva, _vmlaldava, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mCEF(vmlsldav, _vmlsldav, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mCEF(vmlsldava, _vmlsldava, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mCEF(vmlsldavx, _vmlsldavx, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mCEF(vmlsldavax, _vmlsldavax, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav),
+ mToC("vrmlaldavh", ee800f00, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+ mToC("vrmlaldavha",ee800f20, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+ mCEF(vrmlaldavhx, _vrmlaldavhx, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+ mCEF(vrmlaldavhax, _vrmlaldavhax, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+ mToC("vrmlalvh", ee800f00, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+ mToC("vrmlalvha", ee800f20, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+ mCEF(vrmlsldavh, _vrmlsldavh, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+ mCEF(vrmlsldavha, _vrmlsldavha, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+ mCEF(vrmlsldavhx, _vrmlsldavhx, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+ mCEF(vrmlsldavhax, _vrmlsldavhax, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh),
+
+ mToC("vmlas", ee011e40, 3, (RMQ, RMQ, RR), mve_vmlas),
+ mToC("vmulh", ee010e01, 3, (RMQ, RMQ, RMQ), mve_vmulh),
+ mToC("vrmulh", ee011e01, 3, (RMQ, RMQ, RMQ), mve_vmulh),
+ mToC("vpnot", fe310f4d, 0, (), mve_vpnot),
+ mToC("vpsel", fe310f01, 3, (RMQ, RMQ, RMQ), mve_vpsel),
+
+ mToC("vqdmladh", ee000e00, 3, (RMQ, RMQ, RMQ), mve_vqdmladh),
+ mToC("vqdmladhx", ee001e00, 3, (RMQ, RMQ, RMQ), mve_vqdmladh),
+ mToC("vqrdmladh", ee000e01, 3, (RMQ, RMQ, RMQ), mve_vqdmladh),
+ mToC("vqrdmladhx",ee001e01, 3, (RMQ, RMQ, RMQ), mve_vqdmladh),
+ mToC("vqdmlsdh", fe000e00, 3, (RMQ, RMQ, RMQ), mve_vqdmladh),
+ mToC("vqdmlsdhx", fe001e00, 3, (RMQ, RMQ, RMQ), mve_vqdmladh),
+ mToC("vqrdmlsdh", fe000e01, 3, (RMQ, RMQ, RMQ), mve_vqdmladh),
+ mToC("vqrdmlsdhx",fe001e01, 3, (RMQ, RMQ, RMQ), mve_vqdmladh),
+ mToC("vqdmlah", ee000e60, 3, (RMQ, RMQ, RR), mve_vqdmlah),
+ mToC("vqdmlash", ee001e60, 3, (RMQ, RMQ, RR), mve_vqdmlah),
+ mToC("vqrdmlash", ee001e40, 3, (RMQ, RMQ, RR), mve_vqdmlah),
+ mToC("vqdmullt", ee301f00, 3, (RMQ, RMQ, RMQRR), mve_vqdmull),
+ mToC("vqdmullb", ee300f00, 3, (RMQ, RMQ, RMQRR), mve_vqdmull),
+ mCEF(vqmovnt, _vqmovnt, 2, (RMQ, RMQ), mve_vqmovn),
+ mCEF(vqmovnb, _vqmovnb, 2, (RMQ, RMQ), mve_vqmovn),
+ mCEF(vqmovunt, _vqmovunt, 2, (RMQ, RMQ), mve_vqmovn),
+ mCEF(vqmovunb, _vqmovunb, 2, (RMQ, RMQ), mve_vqmovn),
+
+ mCEF(vshrnt, _vshrnt, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vshrnb, _vshrnb, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vrshrnt, _vrshrnt, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vrshrnb, _vrshrnb, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vqshrnt, _vqrshrnt, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vqshrnb, _vqrshrnb, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vqshrunt, _vqrshrunt, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vqshrunb, _vqrshrunb, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vqrshrnt, _vqrshrnt, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vqrshrnb, _vqrshrnb, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vqrshrunt, _vqrshrunt, 3, (RMQ, RMQ, I32z), mve_vshrn),
+ mCEF(vqrshrunb, _vqrshrunb, 3, (RMQ, RMQ, I32z), mve_vshrn),
+
+ mToC("vshlc", eea00fc0, 3, (RMQ, RR, I32z), mve_vshlc),
+ mToC("vshllt", ee201e00, 3, (RMQ, RMQ, I32), mve_vshll),
+ mToC("vshllb", ee200e00, 3, (RMQ, RMQ, I32), mve_vshll),
+
+ toU("dlstp", _dlstp, 2, (LR, RR), t_loloop),
+ toU("wlstp", _wlstp, 3, (LR, RR, EXP), t_loloop),
+ toU("letp", _letp, 2, (LR, EXP), t_loloop),
+ toU("lctp", _lctp, 0, (), t_loloop),
+
+#undef THUMB_VARIANT
+#define THUMB_VARIANT & mve_fp_ext
+ mToC("vcmul", ee300e00, 4, (RMQ, RMQ, RMQ, EXPi), mve_vcmul),
+ mToC("vfmas", ee311e40, 3, (RMQ, RMQ, RR), mve_vfmas),
+ mToC("vmaxnma", ee3f0e81, 2, (RMQ, RMQ), mve_vmaxnma_vminnma),
+ mToC("vminnma", ee3f1e81, 2, (RMQ, RMQ), mve_vmaxnma_vminnma),
+ mToC("vmaxnmv", eeee0f00, 2, (RR, RMQ), mve_vmaxnmv),
+ mToC("vmaxnmav",eeec0f00, 2, (RR, RMQ), mve_vmaxnmv),
+ mToC("vminnmv", eeee0f80, 2, (RR, RMQ), mve_vmaxnmv),
+ mToC("vminnmav",eeec0f80, 2, (RR, RMQ), mve_vmaxnmv),
+
#undef ARM_VARIANT
-#define ARM_VARIANT & fpu_vfp_ext_v1xd
+#define ARM_VARIANT & fpu_vfp_ext_v1
#undef THUMB_VARIANT
#define THUMB_VARIANT & arm_ext_v6t2
+ mnCEF(vmla, _vmla, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), neon_mac_maybe_scalar),
+ mnCEF(vmul, _vmul, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), neon_mul),
+
+ mcCE(fcpyd, eb00b40, 2, (RVD, RVD), vfp_dp_rd_rm),
+
+#undef ARM_VARIANT
+#define ARM_VARIANT & fpu_vfp_ext_v1xd
+
+ MNCE(vmov, 0, 1, (VMOV), neon_mov),
+ mcCE(fmrs, e100a10, 2, (RR, RVS), vfp_reg_from_sp),
+ mcCE(fmsr, e000a10, 2, (RVS, RR), vfp_sp_from_reg),
+ mcCE(fcpys, eb00a40, 2, (RVS, RVS), vfp_sp_monadic),
mCEF(vmullt, _vmullt, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ), mve_vmull),
mnCEF(vadd, _vadd, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR), neon_addsub_if_i),
MNCEF(vabs, 1b10300, 2, (RNSDQMQ, RNSDQMQ), neon_abs_neg),
MNCEF(vneg, 1b10380, 2, (RNSDQMQ, RNSDQMQ), neon_abs_neg),
-#undef ARM_VARIANT
+ mCEF(vmovlt, _vmovlt, 1, (VMOV), mve_movl),
+ mCEF(vmovlb, _vmovlb, 1, (VMOV), mve_movl),
+
+ mnCE(vcmp, _vcmp, 3, (RVSD_COND, RSVDMQ_FI0, oRMQRZ), vfp_nsyn_cmp),
+ mnCE(vcmpe, _vcmpe, 3, (RVSD_COND, RSVDMQ_FI0, oRMQRZ), vfp_nsyn_cmp),
+
+#undef ARM_VARIANT
+#define ARM_VARIANT & fpu_vfp_ext_v2
+
+ mcCE(fmsrr, c400a10, 3, (VRSLST, RR, RR), vfp_sp2_from_reg2),
+ mcCE(fmrrs, c500a10, 3, (RR, RR, VRSLST), vfp_reg2_from_sp2),
+ mcCE(fmdrr, c400b10, 3, (RVD, RR, RR), vfp_dp_rm_rd_rn),
+ mcCE(fmrrd, c500b10, 3, (RR, RR, RVD), vfp_dp_rd_rn_rm),
+
+#undef ARM_VARIANT
+#define ARM_VARIANT & fpu_vfp_ext_armv8xd
+ mnUF(vcvta, _vcvta, 2, (RNSDQMQ, oRNSDQMQ), neon_cvta),
+ mnUF(vcvtp, _vcvta, 2, (RNSDQMQ, oRNSDQMQ), neon_cvtp),
+ mnUF(vcvtn, _vcvta, 3, (RNSDQMQ, oRNSDQMQ, oI32z), neon_cvtn),
+ mnUF(vcvtm, _vcvta, 2, (RNSDQMQ, oRNSDQMQ), neon_cvtm),
+ mnUF(vmaxnm, _vmaxnm, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ), vmaxnm),
+ mnUF(vminnm, _vminnm, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ), vmaxnm),
+
+#undef ARM_VARIANT
#define ARM_VARIANT & fpu_neon_ext_v1
- mnUF(vabd, _vabd, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su),
+ mnUF(vabd, _vabd, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su),
mnUF(vabdl, _vabdl, 3, (RNQMQ, RNDMQ, RNDMQ), neon_dyadic_long),
- mnUF(vaddl, _vaddl, 3, (RNQMQ, RNDMQ, RNDMQR), neon_dyadic_long),
- mnUF(vsubl, _vsubl, 3, (RNQMQ, RNDMQ, RNDMQR), neon_dyadic_long),
+ mnUF(vaddl, _vaddl, 3, (RNSDQMQ, oRNSDMQ, RNSDMQR), neon_dyadic_long),
+ mnUF(vsubl, _vsubl, 3, (RNSDQMQ, oRNSDMQ, RNSDMQR), neon_dyadic_long),
+ mnUF(vand, _vand, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic),
+ mnUF(vbic, _vbic, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic),
+ mnUF(vorr, _vorr, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic),
+ mnUF(vorn, _vorn, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic),
+ mnUF(veor, _veor, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_logic),
+ MNUF(vcls, 1b00400, 2, (RNDQMQ, RNDQMQ), neon_cls),
+ MNUF(vclz, 1b00480, 2, (RNDQMQ, RNDQMQ), neon_clz),
+ mnCE(vdup, _vdup, 2, (RNDQMQ, RR_RNSC), neon_dup),
+ MNUF(vhadd, 00000000, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i_su),
+ MNUF(vrhadd, 00000100, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_i_su),
+ MNUF(vhsub, 00000200, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i_su),
+ mnUF(vmin, _vmin, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su),
+ mnUF(vmax, _vmax, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su),
+ MNUF(vqadd, 0000010, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i64_su),
+ MNUF(vqsub, 0000210, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i64_su),
+ mnUF(vmvn, _vmvn, 2, (RNDQMQ, RNDQMQ_Ibig), neon_mvn),
+ MNUF(vqabs, 1b00700, 2, (RNDQMQ, RNDQMQ), neon_sat_abs_neg),
+ MNUF(vqneg, 1b00780, 2, (RNDQMQ, RNDQMQ), neon_sat_abs_neg),
+ mnUF(vqrdmlah, _vqrdmlah,3, (RNDQMQ, oRNDQMQ, RNDQ_RNSC_RR), neon_qrdmlah),
+ mnUF(vqdmulh, _vqdmulh, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_RNSC_RR), neon_qdmulh),
+ mnUF(vqrdmulh, _vqrdmulh,3, (RNDQMQ, oRNDQMQ, RNDQMQ_RNSC_RR), neon_qdmulh),
+ MNUF(vqrshl, 0000510, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_rshl),
+ MNUF(vrshl, 0000500, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_rshl),
+ MNUF(vshr, 0800010, 3, (RNDQMQ, oRNDQMQ, I64z), neon_rshift_round_imm),
+ MNUF(vrshr, 0800210, 3, (RNDQMQ, oRNDQMQ, I64z), neon_rshift_round_imm),
+ MNUF(vsli, 1800510, 3, (RNDQMQ, oRNDQMQ, I63), neon_sli),
+ MNUF(vsri, 1800410, 3, (RNDQMQ, oRNDQMQ, I64z), neon_sri),
+ MNUF(vrev64, 1b00000, 2, (RNDQMQ, RNDQMQ), neon_rev),
+ MNUF(vrev32, 1b00080, 2, (RNDQMQ, RNDQMQ), neon_rev),
+ MNUF(vrev16, 1b00100, 2, (RNDQMQ, RNDQMQ), neon_rev),
+ mnUF(vshl, _vshl, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_I63b_RR), neon_shl),
+ mnUF(vqshl, _vqshl, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_I63b_RR), neon_qshl),
+ MNUF(vqshlu, 1800610, 3, (RNDQMQ, oRNDQMQ, I63), neon_qshlu_imm),
+
+#undef ARM_VARIANT
+#define ARM_VARIANT & arm_ext_v8_3
+#undef THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v6t2_v8m
+ MNUF (vcadd, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ, EXPi), vcadd),
+ MNUF (vcmla, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ_RNSC, EXPi), vcmla),
+
+#undef ARM_VARIANT
+#define ARM_VARIANT &arm_ext_bf16
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &arm_ext_bf16
+ TUF ("vdot", c000d00, fc000d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vdot, vdot),
+ TUF ("vmmla", c000c40, fc000c40, 3, (RNQ, RNQ, RNQ), vmmla, vmmla),
+ TUF ("vfmab", c300810, fc300810, 3, (RNDQ, RNDQ, RNDQ_RNSC), bfloat_vfma, bfloat_vfma),
+
+#undef ARM_VARIANT
+#define ARM_VARIANT &arm_ext_i8mm
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &arm_ext_i8mm
+ TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
+ TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla),
+ TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
+ TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot),
+ TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot),
+
+#undef ARM_VARIANT
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &arm_ext_cde
+ ToC ("cx1", ee000000, 3, (RCP, APSR_RR, I8191), cx1),
+ ToC ("cx1a", fe000000, 3, (RCP, APSR_RR, I8191), cx1a),
+ ToC ("cx1d", ee000040, 4, (RCP, RR, APSR_RR, I8191), cx1d),
+ ToC ("cx1da", fe000040, 4, (RCP, RR, APSR_RR, I8191), cx1da),
+
+ ToC ("cx2", ee400000, 4, (RCP, APSR_RR, APSR_RR, I511), cx2),
+ ToC ("cx2a", fe400000, 4, (RCP, APSR_RR, APSR_RR, I511), cx2a),
+ ToC ("cx2d", ee400040, 5, (RCP, RR, APSR_RR, APSR_RR, I511), cx2d),
+ ToC ("cx2da", fe400040, 5, (RCP, RR, APSR_RR, APSR_RR, I511), cx2da),
+
+ ToC ("cx3", ee800000, 5, (RCP, APSR_RR, APSR_RR, APSR_RR, I63), cx3),
+ ToC ("cx3a", fe800000, 5, (RCP, APSR_RR, APSR_RR, APSR_RR, I63), cx3a),
+ ToC ("cx3d", ee800040, 6, (RCP, RR, APSR_RR, APSR_RR, APSR_RR, I63), cx3d),
+ ToC ("cx3da", fe800040, 6, (RCP, RR, APSR_RR, APSR_RR, APSR_RR, I63), cx3da),
+
+ mToC ("vcx1", ec200000, 3, (RCP, RNSDMQ, I4095), vcx1),
+ mToC ("vcx1a", fc200000, 3, (RCP, RNSDMQ, I4095), vcx1),
+
+ mToC ("vcx2", ec300000, 4, (RCP, RNSDMQ, RNSDMQ, I127), vcx2),
+ mToC ("vcx2a", fc300000, 4, (RCP, RNSDMQ, RNSDMQ, I127), vcx2),
+
+ mToC ("vcx3", ec800000, 5, (RCP, RNSDMQ, RNSDMQ, RNSDMQ, I15), vcx3),
+ mToC ("vcx3a", fc800000, 5, (RCP, RNSDMQ, RNSDMQ, RNSDMQ, I15), vcx3),
};
+
#undef ARM_VARIANT
#undef THUMB_VARIANT
#undef TCE
/* PR 21809: Do not set a mapping state for debug sections
- it just confuses other tools. */
- if (bfd_get_section_flags (NULL, now_seg) & SEC_DEBUGGING)
+ if (bfd_section_flags (now_seg) & SEC_DEBUGGING)
return;
frag_thumb_mode = fragP->tc_frag_data.thumb_mode ^ MODE_RECORDED;
const char * text_name;
const char * prefix;
const char * prefix_once;
- const char * group_name;
+ struct elf_section_match match;
char * sec_name;
int type;
int flags;
flags = SHF_ALLOC;
linkonce = 0;
- group_name = 0;
+ memset (&match, 0, sizeof (match));
/* Handle COMDAT group. */
if (prefix != prefix_once && (text_seg->flags & SEC_LINK_ONCE) != 0)
{
- group_name = elf_group_name (text_seg);
- if (group_name == NULL)
+ match.group_name = elf_group_name (text_seg);
+ if (match.group_name == NULL)
{
as_bad (_("Group section `%s' has no group signature"),
segment_name (text_seg));
linkonce = 1;
}
- obj_elf_change_section (sec_name, type, 0, flags, 0, group_name,
+ obj_elf_change_section (sec_name, type, flags, 0, &match,
linkonce, 0);
/* Set the section link for index tables. */
break;
case BFD_RELOC_ARM_SMC:
- if (((unsigned long) value) > 0xffff)
+ if (((unsigned long) value) > 0xf)
as_bad_where (fixP->fx_file, fixP->fx_line,
_("invalid smc expression"));
+
newval = md_chars_to_number (buf, INSN_SIZE);
- newval |= (value & 0xf) | ((value & 0xfff0) << 4);
+ newval |= (value & 0xf);
md_number_to_chars (buf, newval, INSN_SIZE);
break;
break;
case BFD_RELOC_THUMB_PCREL_BRANCH9: /* Conditional branch. */
- if ((value & ~0xff) && ((value & ~0xff) != ~0xff))
+ if (out_of_range_p (value, 8))
as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
if (fixP->fx_done || !seg->use_rela_p)
break;
case BFD_RELOC_THUMB_PCREL_BRANCH12: /* Unconditional branch. */
- if ((value & ~0x7ff) && ((value & ~0x7ff) != ~0x7ff))
+ if (out_of_range_p (value, 11))
as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
if (fixP->fx_done || !seg->use_rela_p)
}
break;
+ /* This relocation is misnamed, it should be BRANCH21. */
case BFD_RELOC_THUMB_PCREL_BRANCH20:
if (fixP->fx_addsy
&& (S_GET_SEGMENT (fixP->fx_addsy) == seg)
/* Force a relocation for a branch 20 bits wide. */
fixP->fx_done = 0;
}
- if ((value & ~0x1fffff) && ((value & ~0x0fffff) != ~0x0fffff))
+ if (out_of_range_p (value, 20))
as_bad_where (fixP->fx_file, fixP->fx_line,
_("conditional branch out of range"));
fixP->fx_r_type = BFD_RELOC_THUMB_PCREL_BRANCH23;
#endif
- if ((value & ~0x3fffff) && ((value & ~0x3fffff) != ~0x3fffff))
+ if (out_of_range_p (value, 22))
{
if (!(ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2)))
as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
- else if ((value & ~0x1ffffff)
- && ((value & ~0x1ffffff) != ~0x1ffffff))
+ else if (out_of_range_p (value, 24))
as_bad_where (fixP->fx_file, fixP->fx_line,
_("Thumb2 branch out of range"));
}
break;
case BFD_RELOC_THUMB_PCREL_BRANCH25:
- if ((value & ~0x0ffffff) && ((value & ~0x0ffffff) != ~0x0ffffff))
+ if (out_of_range_p (value, 24))
as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
if (fixP->fx_done || !seg->use_rela_p)
(((unsigned long) fixP->fx_frag->fr_address
+ (unsigned long) fixP->fx_where) & ~3)
+ (unsigned long) value);
+ else if (get_recorded_alignment (seg) < 2)
+ as_warn_where (fixP->fx_file, fixP->fx_line,
+ _("section does not have enough alignment to ensure safe PC-relative loads"));
if (value & ~0x3fc)
as_bad_where (fixP->fx_file, fixP->fx_line,
}
bfd_vma insn = get_thumb32_insn (buf);
- /* le lr, <label> or le <label> */
+ /* le lr, <label>, le <label> or letp lr, <label> */
if (((insn & 0xffffffff) == 0xf00fc001)
- || ((insn & 0xffffffff) == 0xf02fc001))
+ || ((insn & 0xffffffff) == 0xf02fc001)
+ || ((insn & 0xffffffff) == 0xf01fc001))
value = -value;
if (v8_1_branch_value_check (value, 12, FALSE) == FAIL)
if (sec != NULL)
{
- bfd_set_section_flags
- (stdoutput, sec, SEC_READONLY | SEC_DEBUGGING /* | SEC_HAS_CONTENTS */);
- bfd_set_section_size (stdoutput, sec, 0);
+ bfd_set_section_flags (sec, SEC_READONLY | SEC_DEBUGGING);
+ bfd_set_section_size (sec, 0);
bfd_set_section_contents (stdoutput, sec, NULL, 0, 0);
}
}
{"mwarn-deprecated", NULL, &warn_on_deprecated, 1, NULL},
{"mno-warn-deprecated", N_("do not warn on use of deprecated feature"),
&warn_on_deprecated, 0, NULL},
+
+ {"mwarn-restrict-it", N_("warn about performance deprecated IT instructions"
+ " in ARMv8-A and ARMv8-R"), &warn_on_restrict_it, 1, NULL},
+ {"mno-warn-restrict-it", NULL, &warn_on_restrict_it, 0, NULL},
+
{"mwarn-syms", N_("warn about symbols that match instruction names [default]"), (int *) (& flag_warn_syms), TRUE, NULL},
{"mno-warn-syms", N_("disable warnings about symobls that match instructions"), (int *) (& flag_warn_syms), FALSE, NULL},
{NULL, NULL, NULL, 0, NULL}
ARM_ARCH_NONE,
FPU_ARCH_NEON_VFP_V4),
ARM_CPU_OPT ("cortex-a32", "Cortex-A32", ARM_ARCH_V8A,
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
ARM_CPU_OPT ("cortex-a35", "Cortex-A35", ARM_ARCH_V8A,
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
ARM_CPU_OPT ("cortex-a53", "Cortex-A53", ARM_ARCH_V8A,
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
ARM_CPU_OPT ("cortex-a55", "Cortex-A55", ARM_ARCH_V8_2A,
ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
ARM_CPU_OPT ("cortex-a57", "Cortex-A57", ARM_ARCH_V8A,
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
ARM_CPU_OPT ("cortex-a72", "Cortex-A72", ARM_ARCH_V8A,
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
ARM_CPU_OPT ("cortex-a73", "Cortex-A73", ARM_ARCH_V8A,
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
ARM_CPU_OPT ("cortex-a75", "Cortex-A75", ARM_ARCH_V8_2A,
ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
ARM_CPU_OPT ("cortex-a76", "Cortex-A76", ARM_ARCH_V8_2A,
ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
+ ARM_CPU_OPT ("cortex-a76ae", "Cortex-A76AE", ARM_ARCH_V8_2A,
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
+ ARM_CPU_OPT ("cortex-a77", "Cortex-A77", ARM_ARCH_V8_2A,
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
ARM_CPU_OPT ("ares", "Ares", ARM_ARCH_V8_2A,
ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV),
FPU_ARCH_VFP_V3D16),
ARM_CPU_OPT ("cortex-r52", "Cortex-R52", ARM_ARCH_V8R,
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
FPU_ARCH_NEON_VFP_ARMV8),
+ ARM_CPU_OPT ("cortex-m35p", "Cortex-M35P", ARM_ARCH_V8M_MAIN,
+ ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
+ FPU_NONE),
ARM_CPU_OPT ("cortex-m33", "Cortex-M33", ARM_ARCH_V8M_MAIN,
ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
FPU_NONE),
ARM_ARCH_NONE,
FPU_NONE),
ARM_CPU_OPT ("exynos-m1", "Samsung Exynos M1", ARM_ARCH_V8A,
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
ARM_CPU_OPT ("neoverse-n1", "Neoverse N1", ARM_ARCH_V8_2A,
ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
ARM_ARCH_NONE,
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
ARM_CPU_OPT ("xgene2", "APM X-Gene 2", ARM_ARCH_V8A,
- ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
{ NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, ARM_ARCH_NONE, NULL }
static const struct arm_ext_table armv8a_ext_table[] =
{
- ARM_ADD ("crc", ARCH_CRC_ARMV8),
+ ARM_ADD ("crc", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC)),
ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1),
ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_2_FP16),
ARM_ADD ("fp16fml", FPU_ARCH_NEON_VFP_ARMV8_2_FP16FML),
+ ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+ ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
ARM_ADD ("dotprod", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
{
ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+ ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+ ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
{
ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+ ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+ ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
{ NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
};
+static const struct arm_ext_table armv86a_ext_table[] =
+{
+ ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
+ { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
+#define CDE_EXTENSIONS \
+ ARM_ADD ("cdecp0", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE0)), \
+ ARM_ADD ("cdecp1", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE1)), \
+ ARM_ADD ("cdecp2", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE2)), \
+ ARM_ADD ("cdecp3", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE3)), \
+ ARM_ADD ("cdecp4", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE4)), \
+ ARM_ADD ("cdecp5", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE5)), \
+ ARM_ADD ("cdecp6", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE6)), \
+ ARM_ADD ("cdecp7", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE | ARM_EXT2_CDE7))
+
static const struct arm_ext_table armv8m_main_ext_table[] =
{
- ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
- ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP)),
+ ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_AEXT_V8M_MAIN_DSP),
+ ARM_FEATURE_CORE_LOW (ARM_AEXT_V8M_MAIN_DSP)),
ARM_EXT ("fp", FPU_ARCH_VFP_V5_SP_D16, ALL_FP),
ARM_ADD ("fp.dp", FPU_ARCH_VFP_V5D16),
+ CDE_EXTENSIONS,
{ NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
};
+
static const struct arm_ext_table armv8_1m_main_ext_table[] =
{
- ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
- ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP)),
+ ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_AEXT_V8M_MAIN_DSP),
+ ARM_FEATURE_CORE_LOW (ARM_AEXT_V8M_MAIN_DSP)),
ARM_EXT ("fp",
ARM_FEATURE (0, ARM_EXT2_FP16_INST,
FPU_VFP_V5_SP_D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA),
ARM_ADD ("fp.dp",
ARM_FEATURE (0, ARM_EXT2_FP16_INST,
FPU_VFP_V5D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
- ARM_EXT ("mve", ARM_FEATURE_COPROC (FPU_MVE),
- ARM_FEATURE_COPROC (FPU_MVE | FPU_MVE_FP)),
+ ARM_EXT ("mve", ARM_FEATURE (ARM_AEXT_V8M_MAIN_DSP, ARM_EXT2_MVE, 0),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE | ARM_EXT2_MVE_FP)),
ARM_ADD ("mve.fp",
- ARM_FEATURE (0, ARM_EXT2_FP16_INST,
- FPU_MVE | FPU_MVE_FP | FPU_VFP_V5_SP_D16 |
- FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
+ ARM_FEATURE (ARM_AEXT_V8M_MAIN_DSP,
+ ARM_EXT2_FP16_INST | ARM_EXT2_MVE | ARM_EXT2_MVE_FP,
+ FPU_VFP_V5_SP_D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
+ CDE_EXTENSIONS,
{ NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
};
+#undef CDE_EXTENSIONS
+
static const struct arm_ext_table armv8r_ext_table[] =
{
- ARM_ADD ("crc", ARCH_CRC_ARMV8),
+ ARM_ADD ("crc", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC)),
ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
ARM_ARCH_OPT2 ("armv8-r", ARM_ARCH_V8R, FPU_ARCH_VFP, armv8r),
ARM_ARCH_OPT2 ("armv8.4-a", ARM_ARCH_V8_4A, FPU_ARCH_VFP, armv84a),
ARM_ARCH_OPT2 ("armv8.5-a", ARM_ARCH_V8_5A, FPU_ARCH_VFP, armv85a),
+ ARM_ARCH_OPT2 ("armv8.6-a", ARM_ARCH_V8_6A, FPU_ARCH_VFP, armv86a),
ARM_ARCH_OPT ("xscale", ARM_ARCH_XSCALE, FPU_ARCH_VFP),
ARM_ARCH_OPT ("iwmmxt", ARM_ARCH_IWMMXT, FPU_ARCH_VFP),
ARM_ARCH_OPT ("iwmmxt2", ARM_ARCH_IWMMXT2, FPU_ARCH_VFP),
use the context sensitive approach using arm_ext_table's. */
static const struct arm_option_extension_value_table arm_extensions[] =
{
- ARM_EXT_OPT ("crc", ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+ ARM_EXT_OPT ("crc", ARM_FEATURE_CORE_HIGH(ARM_EXT2_CRC),
+ ARM_FEATURE_CORE_HIGH(ARM_EXT2_CRC),
ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8),
return TRUE;
}
+static bfd_boolean
+arm_parse_fp16_opt (const char *str)
+{
+ if (strcasecmp (str, "ieee") == 0)
+ fp16_format = ARM_FP16_FORMAT_IEEE;
+ else if (strcasecmp (str, "alternative") == 0)
+ fp16_format = ARM_FP16_FORMAT_ALTERNATIVE;
+ else
+ {
+ as_bad (_("unrecognised float16 format \"%s\""), str);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
static bfd_boolean
arm_parse_cpu (const char *str)
{
march_ext_opt = XNEW (arm_feature_set);
*march_ext_opt = arm_arch_none;
march_fpu_opt = &opt->default_fpu;
+ selected_ctx_ext_table = opt->ext_table;
strcpy (selected_cpu_name, opt->name);
if (ext != NULL)
arm_parse_it_mode, NULL},
{"mccs", N_("\t\t\t TI CodeComposer Studio syntax compatibility mode"),
arm_ccs_mode, NULL},
+ {"mfp16-format=",
+ N_("[ieee|alternative]\n\
+ set the encoding for half precision floating point "
+ "numbers to IEEE\n\
+ or Arm alternative format."),
+ arm_parse_fp16_opt, NULL },
{NULL, NULL, 0, NULL}
};
{TAG_CPU_ARCH_V8, ARM_ARCH_V8_4A},
{TAG_CPU_ARCH_V8, ARM_ARCH_V8_5A},
{TAG_CPU_ARCH_V8_1M_MAIN, ARM_ARCH_V8_1M_MAIN},
- {-1, ARM_ARCH_NONE}
+ {TAG_CPU_ARCH_V8, ARM_ARCH_V8_6A},
+ {-1, ARM_ARCH_NONE}
};
/* Set an attribute if it has not already been set by the user. */
if (p_ver_ret == NULL)
return -1;
-found:
+ found:
/* Tag_CPU_arch_profile. */
- if (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v7a)
- || ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8)
- || (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_atomics)
- && !ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8m_m_only)))
+ if (!ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8r)
+ && (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v7a)
+ || ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8)
+ || (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_atomics)
+ && !ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8m_m_only))))
*profile = 'A';
- else if (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v7r))
+ else if (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v7r)
+ || ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_v8r))
*profile = 'R';
else if (ARM_CPU_HAS_FEATURE (p_ver_ret->flags, arm_ext_m))
*profile = 'M';
virt_sec |= 2;
if (virt_sec != 0)
aeabi_set_attribute_int (Tag_Virtualization_use, virt_sec);
+
+ if (fp16_format != ARM_FP16_FORMAT_DEFAULT)
+ aeabi_set_attribute_int (Tag_ABI_FP_16bit_format, fp16_format);
}
/* Post relaxation hook. Recompute ARM attributes now that relaxation is
if (streq (opt->name, name))
{
selected_arch = opt->value;
+ selected_ctx_ext_table = opt->ext_table;
selected_ext = arm_arch_none;
selected_cpu = selected_arch;
strcpy (selected_cpu_name, opt->name);
name += 2;
}
+ /* Check the context specific extension table */
+ if (selected_ctx_ext_table)
+ {
+ const struct arm_ext_table * ext_opt;
+ for (ext_opt = selected_ctx_ext_table; ext_opt->name != NULL; ext_opt++)
+ {
+ if (streq (ext_opt->name, name))
+ {
+ if (adding_value)
+ {
+ if (ARM_FEATURE_ZERO (ext_opt->merge))
+ /* TODO: Option not supported. When we remove the
+ legacy table this case should error out. */
+ continue;
+ ARM_MERGE_FEATURE_SETS (selected_ext, selected_ext,
+ ext_opt->merge);
+ }
+ else
+ ARM_CLEAR_FEATURE (selected_ext, selected_ext, ext_opt->clear);
+
+ ARM_MERGE_FEATURE_SETS (selected_cpu, selected_arch, selected_ext);
+ ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, selected_fpu);
+ *input_line_pointer = saved_char;
+ demand_empty_rest_of_line ();
+ return;
+ }
+ }
+ }
+
for (opt = arm_extensions; opt->name != NULL; opt++)
if (streq (opt->name, name))
{
if (streq (opt->name, name))
{
selected_fpu = opt->value;
+ ARM_CLEAR_FEATURE (selected_cpu, selected_cpu, fpu_any);
#ifndef CPU_DEFAULT
if (no_cpu_selected ())
ARM_MERGE_FEATURE_SETS (cpu_variant, arm_arch_any, selected_fpu);