X-Git-Url: http://git.efficios.com/?a=blobdiff_plain;f=gas%2Fconfig%2Ftc-arm.c;h=a69300697f0f8b2de2d22011aee530163ab2e5bb;hb=e3fed0f2fe98c52dc8cb160be2a30e973b1dca3f;hp=192de0b5acc8be1e30b7ef2ef27bf25444fd1d51;hpb=26c1e780920096772b5735250fa31a4184d2253c;p=deliverable%2Fbinutils-gdb.git diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c index 192de0b5ac..a69300697f 100644 --- a/gas/config/tc-arm.c +++ b/gas/config/tc-arm.c @@ -1,5 +1,5 @@ /* tc-arm.c -- Assemble for the ARM - Copyright (C) 1994-2019 Free Software Foundation, Inc. + Copyright (C) 1994-2020 Free Software Foundation, Inc. Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org) Modified by David Taylor (dtaylor@armltd.co.uk) Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com) @@ -32,6 +32,7 @@ #include "obstack.h" #include "libiberty.h" #include "opcode/arm.h" +#include "cpu-arm.h" #ifdef OBJ_ELF #include "elf/arm.h" @@ -106,6 +107,15 @@ enum arm_float_abi should define CPU_DEFAULT here. */ #endif +/* Perform range checks on positive and negative overflows by checking if the + VALUE given fits within the range of an BITS sized immediate. */ +static bfd_boolean out_of_range_p (offsetT value, offsetT bits) + { + gas_assert (bits < (offsetT)(sizeof (value) * 8)); + return (value & ~((1 << bits)-1)) + && ((value & ~((1 << bits)-1)) != ~((1 << bits)-1)); +} + #ifndef FPU_DEFAULT # ifdef TE_LINUX # define FPU_DEFAULT FPU_ARCH_FPA @@ -144,6 +154,7 @@ static int pic_code = FALSE; static int fix_v4bx = FALSE; /* Warn on using deprecated features. */ static int warn_on_deprecated = TRUE; +static int warn_on_restrict_it = FALSE; /* Understand CodeComposer Studio assembly syntax. */ bfd_boolean codecomposer_syntax = FALSE; @@ -219,6 +230,7 @@ static const arm_feature_set arm_ext_div = ARM_FEATURE_CORE_LOW (ARM_EXT_DIV); static const arm_feature_set arm_ext_v7 = ARM_FEATURE_CORE_LOW (ARM_EXT_V7); static const arm_feature_set arm_ext_v7a = ARM_FEATURE_CORE_LOW (ARM_EXT_V7A); static const arm_feature_set arm_ext_v7r = ARM_FEATURE_CORE_LOW (ARM_EXT_V7R); +static const arm_feature_set arm_ext_v8r = ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8R); #ifdef OBJ_ELF static const arm_feature_set ATTRIBUTE_UNUSED arm_ext_v7m = ARM_FEATURE_CORE_LOW (ARM_EXT_V7M); #endif @@ -265,11 +277,33 @@ static const arm_feature_set arm_ext_sb = ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB); static const arm_feature_set arm_ext_predres = ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES); +static const arm_feature_set arm_ext_bf16 = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16); +static const arm_feature_set arm_ext_i8mm = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM); +static const arm_feature_set arm_ext_crc = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC); +static const arm_feature_set arm_ext_cde = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE); +static const arm_feature_set arm_ext_cde0 = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE0); +static const arm_feature_set arm_ext_cde1 = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE1); +static const arm_feature_set arm_ext_cde2 = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE2); +static const arm_feature_set arm_ext_cde3 = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE3); +static const arm_feature_set arm_ext_cde4 = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE4); +static const arm_feature_set arm_ext_cde5 = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE5); +static const arm_feature_set arm_ext_cde6 = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE6); +static const arm_feature_set arm_ext_cde7 = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_CDE7); static const arm_feature_set arm_arch_any = ARM_ANY; -#ifdef OBJ_ELF static const arm_feature_set fpu_any = FPU_ANY; -#endif static const arm_feature_set arm_arch_full ATTRIBUTE_UNUSED = ARM_FEATURE (-1, -1, -1); static const arm_feature_set arm_arch_t2 = ARM_ARCH_THUMB2; static const arm_feature_set arm_arch_none = ARM_ARCH_NONE; @@ -303,9 +337,14 @@ static const arm_feature_set fpu_neon_ext_v1 = static const arm_feature_set fpu_vfp_v3_or_neon_ext = ARM_FEATURE_COPROC (FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3); static const arm_feature_set mve_ext = - ARM_FEATURE_COPROC (FPU_MVE); + ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE); static const arm_feature_set mve_fp_ext = - ARM_FEATURE_COPROC (FPU_MVE_FP); + ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE_FP); +/* Note: This has more than one bit set, which means using it with + mark_feature_used (which returns if *any* of the bits are set in the current + cpu variant) can give surprising results. */ +static const arm_feature_set armv8m_fp = + ARM_FEATURE_COPROC (FPU_VFP_V5_SP_D16); #ifdef OBJ_ELF static const arm_feature_set fpu_vfp_fp16 = ARM_FEATURE_COPROC (FPU_VFP_EXT_FP16); @@ -322,8 +361,6 @@ static const arm_feature_set fpu_neon_ext_armv8 = ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8); static const arm_feature_set fpu_crypto_ext_armv8 = ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8); -static const arm_feature_set crc_ext_armv8 = - ARM_FEATURE_COPROC (CRC_EXT_ARMV8); static const arm_feature_set fpu_neon_ext_v8_1 = ARM_FEATURE_COPROC (FPU_NEON_EXT_RDMA); static const arm_feature_set fpu_neon_ext_dotprod = @@ -345,6 +382,7 @@ static arm_feature_set selected_fpu = FPU_NONE; /* Feature bits selected by the last .object_arch directive. */ static arm_feature_set selected_object_arch = ARM_ARCH_NONE; /* Must be long enough to hold any of the names in arm_cpus. */ +static const struct arm_ext_table * selected_ctx_ext_table = NULL; static char selected_cpu_name[20]; extern FLONUM_TYPE generic_floating_point_number; @@ -436,6 +474,7 @@ enum neon_el_type NT_float, NT_poly, NT_signed, + NT_bfloat, NT_unsigned }; @@ -445,7 +484,7 @@ struct neon_type_el unsigned size; }; -#define NEON_MAX_TYPE_ELS 4 +#define NEON_MAX_TYPE_ELS 5 struct neon_type { @@ -467,7 +506,7 @@ enum pred_instruction_type VPT_INSN, /* The VPT/VPST insn has been parsed. */ MVE_OUTSIDE_PRED_INSN , /* Instruction to indicate a MVE instruction without a predication code. */ - MVE_UNPREDICABLE_INSN /* MVE instruction that is non-predicable. */ + MVE_UNPREDICABLE_INSN, /* MVE instruction that is non-predicable. */ }; /* The maximum number of operands we need. */ @@ -690,7 +729,7 @@ const char * const reg_expected_msgs[] = [REG_TYPE_MMXWCG] = N_("iWMMXt scalar register expected"), [REG_TYPE_XSCALE] = N_("XScale accumulator register expected"), [REG_TYPE_MQ] = N_("MVE vector register expected"), - [REG_TYPE_RNB] = N_("") + [REG_TYPE_RNB] = "" }; /* Some well known registers that we refer to directly elsewhere. */ @@ -867,6 +906,7 @@ struct asm_opcode #define BAD_ADDR_MODE _("instruction does not accept this addressing mode") #define BAD_BRANCH _("branch must be last instruction in IT block") #define BAD_BRANCH_OFF _("branch out of range or not a multiple of 2") +#define BAD_NO_VPT _("instruction not allowed in VPT block") #define BAD_NOT_IT _("instruction not allowed in IT block") #define BAD_NOT_VPT _("instruction missing MVE vector predication code") #define BAD_FPU _("selected FPU does not support instruction") @@ -883,6 +923,9 @@ struct asm_opcode _("cannot use writeback with PC-relative addressing") #define BAD_RANGE _("branch out of range") #define BAD_FP16 _("selected processor does not support fp16 instruction") +#define BAD_BF16 _("selected processor does not support bf16 instruction") +#define BAD_CDE _("selected processor does not support cde instruction") +#define BAD_CDE_COPROC _("coprocessor for insn is not enabled for cde") #define UNPRED_REG(R) _("using " R " results in unpredictable behaviour") #define THUMB1_RELOC_ONLY _("relocation valid in thumb1 code only") #define MVE_NOT_IT _("Warning: instruction is UNPREDICTABLE in an IT " \ @@ -1009,6 +1052,9 @@ static void it_fsm_post_encode (void); } \ while (0) +/* Toggle value[pos]. */ +#define TOGGLE_BIT(value, pos) (value ^ (1 << pos)) + /* Pure syntax. */ /* This array holds the chars that always start a comment. If the @@ -1034,7 +1080,7 @@ const char EXP_CHARS[] = "eE"; /* As in 0f12.456 */ /* or 0d1.2345e12 */ -const char FLT_CHARS[] = "rRsSfFdDxXeEpP"; +const char FLT_CHARS[] = "rRsSfFdDxXeEpPHh"; /* Prefix characters that indicate the start of an immediate value. */ @@ -1044,6 +1090,16 @@ const char FLT_CHARS[] = "rRsSfFdDxXeEpP"; #define skip_whitespace(str) do { if (*(str) == ' ') ++(str); } while (0) +enum fp_16bit_format +{ + ARM_FP16_FORMAT_IEEE = 0x1, + ARM_FP16_FORMAT_ALTERNATIVE = 0x2, + ARM_FP16_FORMAT_DEFAULT = 0x3 +}; + +static enum fp_16bit_format fp16_format = ARM_FP16_FORMAT_DEFAULT; + + static inline int skip_past_char (char ** str, char c) { @@ -1185,6 +1241,57 @@ md_atof (int type, char * litP, int * sizeP) switch (type) { + case 'H': + case 'h': + prec = 1; + break; + + /* If this is a bfloat16, then parse it slightly differently, as it + does not follow the IEEE specification for floating point numbers + exactly. */ + case 'b': + { + FLONUM_TYPE generic_float; + + t = atof_ieee_detail (input_line_pointer, 1, 8, words, &generic_float); + + if (t) + input_line_pointer = t; + else + return _("invalid floating point number"); + + switch (generic_float.sign) + { + /* Is +Inf. */ + case 'P': + words[0] = 0x7f80; + break; + + /* Is -Inf. */ + case 'N': + words[0] = 0xff80; + break; + + /* Is NaN. */ + /* bfloat16 has two types of NaN - quiet and signalling. + Quiet NaN has bit[6] == 1 && faction != 0, whereas + signalling NaN's have bit[0] == 0 && fraction != 0. + Chosen this specific encoding as it is the same form + as used by other IEEE 754 encodings in GAS. */ + case 0: + words[0] = 0x7fff; + break; + + default: + break; + } + + *sizeP = 2; + + md_number_to_chars (litP, (valueT) words[0], sizeof (LITTLENUM_TYPE)); + + return NULL; + } case 'f': case 'F': case 's': @@ -1219,34 +1326,29 @@ md_atof (int type, char * litP, int * sizeP) input_line_pointer = t; *sizeP = prec * sizeof (LITTLENUM_TYPE); - if (target_big_endian) - { - for (i = 0; i < prec; i++) - { - md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE)); - litP += sizeof (LITTLENUM_TYPE); - } - } + if (target_big_endian || prec == 1) + for (i = 0; i < prec; i++) + { + md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE)); + litP += sizeof (LITTLENUM_TYPE); + } + else if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_endian_pure)) + for (i = prec - 1; i >= 0; i--) + { + md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE)); + litP += sizeof (LITTLENUM_TYPE); + } else - { - if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_endian_pure)) - for (i = prec - 1; i >= 0; i--) - { - md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE)); - litP += sizeof (LITTLENUM_TYPE); - } - else - /* For a 4 byte float the order of elements in `words' is 1 0. - For an 8 byte float the order is 1 0 3 2. */ - for (i = 0; i < prec; i += 2) - { - md_number_to_chars (litP, (valueT) words[i + 1], - sizeof (LITTLENUM_TYPE)); - md_number_to_chars (litP + sizeof (LITTLENUM_TYPE), - (valueT) words[i], sizeof (LITTLENUM_TYPE)); - litP += 2 * sizeof (LITTLENUM_TYPE); - } - } + /* For a 4 byte float the order of elements in `words' is 1 0. + For an 8 byte float the order is 1 0 3 2. */ + for (i = 0; i < prec; i += 2) + { + md_number_to_chars (litP, (valueT) words[i + 1], + sizeof (LITTLENUM_TYPE)); + md_number_to_chars (litP + sizeof (LITTLENUM_TYPE), + (valueT) words[i], sizeof (LITTLENUM_TYPE)); + litP += 2 * sizeof (LITTLENUM_TYPE); + } return NULL; } @@ -1445,6 +1547,28 @@ parse_neon_type (struct neon_type *type, char **str) thissize = 64; ptr++; goto done; + case 'b': + thistype = NT_bfloat; + switch (TOLOWER (*(++ptr))) + { + case 'f': + ptr += 1; + thissize = strtoul (ptr, &ptr, 10); + if (thissize != 16) + { + as_bad (_("bad size %d in type specifier"), thissize); + return FAIL; + } + goto done; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case ' ': case '.': + as_bad (_("unexpected type character `b' -- did you mean `bf'?")); + return FAIL; + default: + break; + } + break; default: as_bad (_("unexpected character `%c' in type specifier"), *ptr); return FAIL; @@ -1846,7 +1970,7 @@ parse_reg_list (char ** strp, enum reg_list_els etype) const char apsr_str[] = "apsr"; int apsr_str_len = strlen (apsr_str); - reg = arm_reg_parse (&str, REGLIST_RN); + reg = arm_reg_parse (&str, REG_TYPE_RN); if (etype == REGLIST_CLRM) { if (reg == REG_SP || reg == REG_PC) @@ -2775,8 +2899,7 @@ s_unreq (int a ATTRIBUTE_UNUSED) hash_delete (arm_reg_hsh, name, FALSE); free ((char *) reg->name); - if (reg->neon) - free (reg->neon); + free (reg->neon); free (reg); /* Also locate the all upper case and all lower case versions. @@ -2791,8 +2914,7 @@ s_unreq (int a ATTRIBUTE_UNUSED) { hash_delete (arm_reg_hsh, nbuf, FALSE); free ((char *) reg->name); - if (reg->neon) - free (reg->neon); + free (reg->neon); free (reg); } @@ -2803,8 +2925,7 @@ s_unreq (int a ATTRIBUTE_UNUSED) { hash_delete (arm_reg_hsh, nbuf, FALSE); free ((char *) reg->name); - if (reg->neon) - free (reg->neon); + free (reg->neon); free (reg); } @@ -4523,7 +4644,7 @@ s_arm_unwind_save_mmxwr (void) } return; -error: + error: ignore_rest_of_line (); } @@ -4591,7 +4712,7 @@ s_arm_unwind_save_mmxwcg (void) op = 0xc700 | mask; add_unwind_opcode (op, 2); return; -error: + error: ignore_rest_of_line (); } @@ -4922,6 +5043,55 @@ pe_directive_secrel (int dummy ATTRIBUTE_UNUSED) } #endif /* TE_PE */ +int +arm_is_largest_exponent_ok (int precision) +{ + /* precision == 1 ensures that this will only return + true for 16 bit floats. */ + return (precision == 1) && (fp16_format == ARM_FP16_FORMAT_ALTERNATIVE); +} + +static void +set_fp16_format (int dummy ATTRIBUTE_UNUSED) +{ + char saved_char; + char* name; + enum fp_16bit_format new_format; + + new_format = ARM_FP16_FORMAT_DEFAULT; + + name = input_line_pointer; + while (*input_line_pointer && !ISSPACE (*input_line_pointer)) + input_line_pointer++; + + saved_char = *input_line_pointer; + *input_line_pointer = 0; + + if (strcasecmp (name, "ieee") == 0) + new_format = ARM_FP16_FORMAT_IEEE; + else if (strcasecmp (name, "alternative") == 0) + new_format = ARM_FP16_FORMAT_ALTERNATIVE; + else + { + as_bad (_("unrecognised float16 format \"%s\""), name); + goto cleanup; + } + + /* Only set fp16_format if it is still the default (aka not already + been set yet). */ + if (fp16_format == ARM_FP16_FORMAT_DEFAULT) + fp16_format = new_format; + else + { + if (new_format != fp16_format) + as_warn (_("float16 format cannot be set more than once, ignoring.")); + } + + cleanup: + *input_line_pointer = saved_char; + ignore_rest_of_line (); +} + /* This table describes all the machine specific pseudo-ops the assembler has to support. The fields are: pseudo-op name without dot @@ -4989,6 +5159,7 @@ const pseudo_typeS md_pseudo_table[] = { "extend", float_cons, 'x' }, { "ldouble", float_cons, 'x' }, { "packed", float_cons, 'p' }, + { "bfloat16", float_cons, 'b' }, #ifdef TE_PE {"secrel32", pe_directive_secrel, 0}, #endif @@ -4999,9 +5170,12 @@ const pseudo_typeS md_pseudo_table[] = {"asmfunc", s_ccs_asmfunc, 0}, {"endasmfunc", s_ccs_endasmfunc, 0}, + {"float16", float_cons, 'h' }, + {"float16_format", set_fp16_format, 0 }, + { 0, 0, 0 } }; - + /* Parser functions used exclusively in instruction operands. */ /* Generic immediate-value read function for use in insn parsing. @@ -6190,7 +6364,7 @@ parse_psr (char **str, bfd_boolean lhs) goto unsupported_psr; p += 4; -check_suffix: + check_suffix: if (*p == '_') { /* A suffix follows. */ @@ -6678,8 +6852,10 @@ parse_neon_mov (char **str, int *which_operand) inst.operands[i].present = 1; } } - else if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype, - &optype)) != FAIL) + else if (((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype, + &optype)) != FAIL) + || ((val = arm_typed_reg_parse (&ptr, REG_TYPE_MQ, &rtype, + &optype)) != FAIL)) { /* Case 0: VMOV , Case 1: VMOV
, @@ -6894,6 +7070,8 @@ enum operand_parse_code OP_RNDMQ, /* Neon double precision (0..31) or MVE vector register. */ OP_RNDMQR, /* Neon double precision (0..31), MVE vector or ARM register. */ + OP_RNSDMQR, /* Neon single or double precision, MVE vector or ARM register. + */ OP_RNQ, /* Neon quad precision register */ OP_RNQMQ, /* Neon quad or MVE vector register. */ OP_RVSD, /* VFP single or double precision register */ @@ -6902,6 +7080,7 @@ enum operand_parse_code OP_RNSD, /* Neon single or double precision register */ OP_RNDQ, /* Neon double or quad precision register */ OP_RNDQMQ, /* Neon double, quad or MVE vector register. */ + OP_RNDQMQR, /* Neon double, quad, MVE vector or ARM register. */ OP_RNSDQ, /* Neon single, double or quad precision register */ OP_RNSC, /* Neon scalar D[X] */ OP_RVC, /* VFP control register */ @@ -6916,18 +7095,21 @@ enum operand_parse_code OP_RIWG, /* iWMMXt wCG register */ OP_RXA, /* XScale accumulator register */ + OP_RNSDMQ, /* Neon single, double or MVE vector register */ OP_RNSDQMQ, /* Neon single, double or quad register or MVE vector register */ OP_RNSDQMQR, /* Neon single, double or quad register, MVE vector register or GPR (no SP/SP) */ OP_RMQ, /* MVE vector register. */ OP_RMQRZ, /* MVE vector or ARM register including ZR. */ + OP_RMQRR, /* MVE vector or ARM register. */ /* New operands for Armv8.1-M Mainline. */ OP_LR, /* ARM LR register */ OP_RRe, /* ARM register, only even numbered. */ OP_RRo, /* ARM register, only odd numbered, not r13 or r15. */ OP_RRnpcsp_I32, /* ARM register (no BadReg) or literal 1 .. 32 */ + OP_RR_ZR, /* ARM register or ZR but no PC */ OP_REGLST, /* ARM register list */ OP_CLRMLST, /* CLRM register list */ @@ -6950,11 +7132,21 @@ enum operand_parse_code OP_RNSDQ_RNSC, /* Vector S, D or Q reg, or Neon scalar. */ OP_RNSDQ_RNSC_MQ, /* Vector S, D or Q reg, Neon scalar or MVE vector register. */ + OP_RNSDQ_RNSC_MQ_RR, /* Vector S, D or Q reg, or MVE vector reg , or Neon + scalar, or ARM register. */ OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar. */ + OP_RNDQ_RNSC_RR, /* Neon D or Q reg, Neon scalar, or ARM register. */ + OP_RNDQMQ_RNSC_RR, /* Neon D or Q reg, Neon scalar, MVE vector or ARM + register. */ + OP_RNDQMQ_RNSC, /* Neon D, Q or MVE vector reg, or Neon scalar. */ OP_RND_RNSC, /* Neon D reg, or Neon scalar. */ OP_VMOV, /* Neon VMOV operands. */ OP_RNDQ_Ibig, /* Neon D or Q reg, or big immediate for logic and VMVN. */ + /* Neon D, Q or MVE vector register, or big immediate for logic and VMVN. */ + OP_RNDQMQ_Ibig, OP_RNDQ_I63b, /* Neon D or Q reg, or immediate for shift. */ + OP_RNDQMQ_I63b_RR, /* Neon D or Q reg, immediate for shift, MVE vector or + ARM register. */ OP_RIWR_I32z, /* iWMMXt wR register, or immediate 0 .. 32 for iWMMXt2. */ OP_VLDR, /* VLDR operand. */ @@ -6967,12 +7159,16 @@ enum operand_parse_code OP_I31w, /* 0 .. 31, optional trailing ! */ OP_I32, /* 1 .. 32 */ OP_I32z, /* 0 .. 32 */ + OP_I48_I64, /* 48 or 64 */ OP_I63, /* 0 .. 63 */ OP_I63s, /* -64 .. 63 */ OP_I64, /* 1 .. 64 */ OP_I64z, /* 0 .. 64 */ + OP_I127, /* 0 .. 127 */ OP_I255, /* 0 .. 255 */ - + OP_I511, /* 0 .. 511 */ + OP_I4095, /* 0 .. 4095 */ + OP_I8191, /* 0 .. 8191 */ OP_I4b, /* immediate, prefix optional, 1 .. 4 */ OP_I7b, /* 0 .. 7 */ OP_I15b, /* 0 .. 15 */ @@ -7029,6 +7225,8 @@ enum operand_parse_code OP_oRNSDQ, /* Optional single, double or quad precision vector register */ OP_oRNSDQMQ, /* Optional single, double or quad register or MVE vector register. */ + OP_oRNSDMQ, /* Optional single, double register or MVE vector + register. */ OP_oSHll, /* LSL immediate */ OP_oSHar, /* ASR immediate */ OP_oSHllar, /* LSL or ASR immediate */ @@ -7118,6 +7316,25 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) } \ while (0) +#define po_imm1_or_imm2_or_fail(imm1, imm2, popt) \ + do \ + { \ + expressionS exp; \ + my_get_expression (&exp, &str, popt); \ + if (exp.X_op != O_constant) \ + { \ + inst.error = _("constant expression required"); \ + goto failure; \ + } \ + if (exp.X_add_number != imm1 && exp.X_add_number != imm2) \ + { \ + inst.error = _("immediate value 48 or 64 expected"); \ + goto failure; \ + } \ + inst.operands[i].imm = exp.X_add_number; \ + } \ + while (0) + #define po_scalar_or_goto(elsz, label, reg_type) \ do \ { \ @@ -7206,6 +7423,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_RVS: po_reg_or_fail (REG_TYPE_VFS); break; case OP_RVD: po_reg_or_fail (REG_TYPE_VFD); break; case OP_oRND: + case OP_RNSDMQR: + po_reg_or_goto (REG_TYPE_VFS, try_rndmqr); + break; + try_rndmqr: case OP_RNDMQR: po_reg_or_goto (REG_TYPE_RN, try_rndmq); break; @@ -7220,7 +7441,20 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) break; /* Also accept generic coprocessor regs for unknown registers. */ coproc_reg: - po_reg_or_fail (REG_TYPE_CN); + po_reg_or_goto (REG_TYPE_CN, vpr_po); + break; + /* Also accept P0 or p0 for VPR.P0. Since P0 is already an + existing register with a value of 0, this seems like the + best way to parse P0. */ + vpr_po: + if (strncasecmp (str, "P0", 2) == 0) + { + str += 2; + inst.operands[i].isreg = 1; + inst.operands[i].reg = 13; + } + else + goto failure; break; case OP_RMF: po_reg_or_fail (REG_TYPE_MVF); break; case OP_RMD: po_reg_or_fail (REG_TYPE_MVD); break; @@ -7239,6 +7473,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) try_nq: case OP_RNQ: po_reg_or_fail (REG_TYPE_NQ); break; case OP_RNSD: po_reg_or_fail (REG_TYPE_NSD); break; + case OP_RNDQMQR: + po_reg_or_goto (REG_TYPE_RN, try_rndqmq); + break; + try_rndqmq: case OP_oRNDQMQ: case OP_RNDQMQ: po_reg_or_goto (REG_TYPE_MQ, try_rndq); @@ -7254,6 +7492,13 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_RVSD_COND: po_reg_or_goto (REG_TYPE_VFSD, try_cond); break; + case OP_oRNSDMQ: + case OP_RNSDMQ: + po_reg_or_goto (REG_TYPE_NSD, try_mq2); + break; + try_mq2: + po_reg_or_fail (REG_TYPE_MQ); + break; case OP_oRNSDQ: case OP_RNSDQ: po_reg_or_fail (REG_TYPE_NSDQ); break; case OP_RNSDQMQR: @@ -7268,6 +7513,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) po_reg_or_fail (REG_TYPE_NSDQ); inst.error = 0; break; + case OP_RMQRR: + po_reg_or_goto (REG_TYPE_RN, try_rmq); + break; + try_rmq: case OP_RMQ: po_reg_or_fail (REG_TYPE_MQ); break; @@ -7317,6 +7566,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) } break; + case OP_RNSDQ_RNSC_MQ_RR: + po_reg_or_goto (REG_TYPE_RN, try_rnsdq_rnsc_mq); + break; + try_rnsdq_rnsc_mq: case OP_RNSDQ_RNSC_MQ: po_reg_or_goto (REG_TYPE_MQ, try_rnsdq_rnsc); break; @@ -7344,6 +7597,17 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) } break; + case OP_RNDQMQ_RNSC_RR: + po_reg_or_goto (REG_TYPE_MQ, try_rndq_rnsc_rr); + break; + try_rndq_rnsc_rr: + case OP_RNDQ_RNSC_RR: + po_reg_or_goto (REG_TYPE_RN, try_rndq_rnsc); + break; + case OP_RNDQMQ_RNSC: + po_reg_or_goto (REG_TYPE_MQ, try_rndq_rnsc); + break; + try_rndq_rnsc: case OP_RNDQ_RNSC: { po_scalar_or_goto (8, try_ndq, REG_TYPE_VFD); @@ -7368,6 +7632,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) po_misc_or_fail (parse_neon_mov (&str, &i) == FAIL); break; + case OP_RNDQMQ_Ibig: + po_reg_or_goto (REG_TYPE_MQ, try_rndq_ibig); + break; + try_rndq_ibig: case OP_RNDQ_Ibig: { po_reg_or_goto (REG_TYPE_NDQ, try_immbig); @@ -7384,6 +7652,13 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) } break; + case OP_RNDQMQ_I63b_RR: + po_reg_or_goto (REG_TYPE_MQ, try_rndq_i63b_rr); + break; + try_rndq_i63b_rr: + po_reg_or_goto (REG_TYPE_RN, try_rndq_i63b); + break; + try_rndq_i63b: case OP_RNDQ_I63b: { po_reg_or_goto (REG_TYPE_NDQ, try_shimm); @@ -7415,12 +7690,16 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_I31: po_imm_or_fail ( 0, 31, FALSE); break; case OP_I32: po_imm_or_fail ( 1, 32, FALSE); break; case OP_I32z: po_imm_or_fail ( 0, 32, FALSE); break; + case OP_I48_I64: po_imm1_or_imm2_or_fail (48, 64, FALSE); break; case OP_I63s: po_imm_or_fail (-64, 63, FALSE); break; case OP_I63: po_imm_or_fail ( 0, 63, FALSE); break; case OP_I64: po_imm_or_fail ( 1, 64, FALSE); break; case OP_I64z: po_imm_or_fail ( 0, 64, FALSE); break; + case OP_I127: po_imm_or_fail ( 0, 127, FALSE); break; case OP_I255: po_imm_or_fail ( 0, 255, FALSE); break; - + case OP_I511: po_imm_or_fail ( 0, 511, FALSE); break; + case OP_I4095: po_imm_or_fail ( 0, 4095, FALSE); break; + case OP_I8191: po_imm_or_fail ( 0, 8191, FALSE); break; case OP_I4b: po_imm_or_fail ( 1, 4, TRUE); break; case OP_oI7b: case OP_I7b: po_imm_or_fail ( 0, 7, TRUE); break; @@ -7513,6 +7792,9 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_RRnpc_I0: po_reg_or_goto (REG_TYPE_RN, I0); break; I0: po_imm_or_fail (0, 0, FALSE); break; + case OP_RRnpcsp_I32: po_reg_or_goto (REG_TYPE_RN, I32); break; + I32: po_imm_or_fail (1, 32, FALSE); break; + case OP_RF_IF: po_reg_or_goto (REG_TYPE_FN, IF); break; IF: if (!is_immediate_prefix (*str)) @@ -7744,6 +8026,8 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_oRMQRZ: po_reg_or_goto (REG_TYPE_MQ, try_rr_zr); break; + + case OP_RR_ZR: try_rr_zr: po_reg_or_goto (REG_TYPE_RN, ZR); break; @@ -7772,6 +8056,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_oRRnpcsp: case OP_RRnpcsp: + case OP_RRnpcsp_I32: if (inst.operands[i].isreg) { if (inst.operands[i].reg == REG_PC) @@ -7830,6 +8115,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_RMQRZ: case OP_oRMQRZ: + case OP_RR_ZR: if (!inst.operands[i].iszr && inst.operands[i].reg == REG_PC) inst.error = BAD_PC; break; @@ -8650,6 +8936,11 @@ move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3) inst.instruction |= (imm & 0x0800) << 15; inst.instruction |= (imm & 0x0700) << 4; inst.instruction |= (imm & 0x00ff); + /* In case this replacement is being done on Armv8-M + Baseline we need to make sure to disable the + instruction size check, as otherwise GAS will reject + the use of this T32 instruction. */ + inst.size_req = 0; return TRUE; } } @@ -9774,10 +10065,42 @@ do_vmrs (void) return; } - /* MVFR2 is only valid at ARMv8-A. */ - if (inst.operands[1].reg == 5) - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), - _(BAD_FPU)); + switch (inst.operands[1].reg) + { + /* MVFR2 is only valid for Armv8-A. */ + case 5: + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), + _(BAD_FPU)); + break; + + /* Check for new Armv8.1-M Mainline changes to . */ + case 1: /* fpscr. */ + constraint (!(ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + || ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)), + _(BAD_FPU)); + break; + + case 14: /* fpcxt_ns. */ + case 15: /* fpcxt_s. */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main), + _("selected processor does not support instruction")); + break; + + case 2: /* fpscr_nzcvqc. */ + case 12: /* vpr. */ + case 13: /* p0. */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main) + || (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)), + _("selected processor does not support instruction")); + if (inst.operands[0].reg != 2 + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + as_warn (_("accessing MVE system register without MVE is UNPREDICTABLE")); + break; + + default: + break; + } /* APSR_ sets isvec. All other refs to PC are illegal. */ if (!inst.operands[0].isvec && Rt == REG_PC) @@ -9805,10 +10128,42 @@ do_vmsr (void) return; } - /* MVFR2 is only valid for ARMv8-A. */ - if (inst.operands[0].reg == 5) - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), - _(BAD_FPU)); + switch (inst.operands[0].reg) + { + /* MVFR2 is only valid for Armv8-A. */ + case 5: + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), + _(BAD_FPU)); + break; + + /* Check for new Armv8.1-M Mainline changes to . */ + case 1: /* fpcr. */ + constraint (!(ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + || ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)), + _(BAD_FPU)); + break; + + case 14: /* fpcxt_ns. */ + case 15: /* fpcxt_s. */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main), + _("selected processor does not support instruction")); + break; + + case 2: /* fpscr_nzcvqc. */ + case 12: /* vpr. */ + case 13: /* p0. */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main) + || (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)), + _("selected processor does not support instruction")); + if (inst.operands[0].reg != 2 + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + as_warn (_("accessing MVE system register without MVE is UNPREDICTABLE")); + break; + + default: + break; + } /* If we get through parsing the register name, we just insert the number generated into the instruction without further validation. */ @@ -10108,6 +10463,9 @@ do_shift (void) static void do_smc (void) { + unsigned int value = inst.relocs[0].exp.X_add_number; + constraint (value > 0xf, _("immediate too large (bigger than 0xF)")); + inst.relocs[0].type = BFD_RELOC_ARM_SMC; inst.relocs[0].pc_rel = 0; } @@ -11052,7 +11410,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d) inst.error = _("instruction does not accept unindexed addressing"); } -/* Table of Thumb instructions which exist in both 16- and 32-bit +/* Table of Thumb instructions which exist in 16- and/or 32-bit encodings (the latter only in post-V6T2 cores). The index is the value used in the insns table below. When there is more than one possible 16-bit encoding for the instruction, this table always @@ -11081,16 +11439,27 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d) X(_bflx, 0000, f070e001), \ X(_bic, 4380, ea200000), \ X(_bics, 4380, ea300000), \ + X(_cinc, 0000, ea509000), \ + X(_cinv, 0000, ea50a000), \ X(_cmn, 42c0, eb100f00), \ X(_cmp, 2800, ebb00f00), \ + X(_cneg, 0000, ea50b000), \ X(_cpsie, b660, f3af8400), \ X(_cpsid, b670, f3af8600), \ X(_cpy, 4600, ea4f0000), \ + X(_csel, 0000, ea508000), \ + X(_cset, 0000, ea5f900f), \ + X(_csetm, 0000, ea5fa00f), \ + X(_csinc, 0000, ea509000), \ + X(_csinv, 0000, ea50a000), \ + X(_csneg, 0000, ea50b000), \ X(_dec_sp,80dd, f1ad0d00), \ X(_dls, 0000, f040e001), \ + X(_dlstp, 0000, f000e001), \ X(_eor, 4040, ea800000), \ X(_eors, 4040, ea900000), \ X(_inc_sp,00dd, f10d0d00), \ + X(_lctp, 0000, f00fe001), \ X(_ldmia, c800, e8900000), \ X(_ldr, 6800, f8500000), \ X(_ldrb, 7800, f8100000), \ @@ -11101,6 +11470,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d) X(_ldr_pc2,4800, f85f0000), \ X(_ldr_sp,9800, f85d0000), \ X(_le, 0000, f00fc001), \ + X(_letp, 0000, f01fc001), \ X(_lsl, 0000, fa00f000), \ X(_lsls, 0000, fa10f000), \ X(_lsr, 0800, fa20f000), \ @@ -11143,6 +11513,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d) X(_wfe, bf20, f3af8002), \ X(_wfi, bf30, f3af8003), \ X(_wls, 0000, f040c001), \ + X(_wlstp, 0000, f000c001), \ X(_sev, bf40, f3af8004), \ X(_sevl, bf50, f3af8005), \ X(_udf, de00, f7f0a000) @@ -11896,6 +12267,60 @@ do_t_clz (void) inst.instruction |= Rm; } +/* For the Armv8.1-M conditional instructions. */ +static void +do_t_cond (void) +{ + unsigned Rd, Rn, Rm; + signed int cond; + + constraint (inst.cond != COND_ALWAYS, BAD_COND); + + Rd = inst.operands[0].reg; + switch (inst.instruction) + { + case T_MNEM_csinc: + case T_MNEM_csinv: + case T_MNEM_csneg: + case T_MNEM_csel: + Rn = inst.operands[1].reg; + Rm = inst.operands[2].reg; + cond = inst.operands[3].imm; + constraint (Rn == REG_SP, BAD_SP); + constraint (Rm == REG_SP, BAD_SP); + break; + + case T_MNEM_cinc: + case T_MNEM_cinv: + case T_MNEM_cneg: + Rn = inst.operands[1].reg; + cond = inst.operands[2].imm; + /* Invert the last bit to invert the cond. */ + cond = TOGGLE_BIT (cond, 0); + constraint (Rn == REG_SP, BAD_SP); + Rm = Rn; + break; + + case T_MNEM_csetm: + case T_MNEM_cset: + cond = inst.operands[1].imm; + /* Invert the last bit to invert the cond. */ + cond = TOGGLE_BIT (cond, 0); + Rn = REG_PC; + Rm = REG_PC; + break; + + default: abort (); + } + + set_pred_insn_type (OUTSIDE_PRED_INSN); + inst.instruction = THUMB_OP32 (inst.instruction); + inst.instruction |= Rd << 8; + inst.instruction |= Rn << 16; + inst.instruction |= Rm; + inst.instruction |= cond << 4; +} + static void do_t_csdb (void) { @@ -13671,10 +14096,11 @@ do_t_smc (void) _("SMC is not permitted on this architecture")); constraint (inst.relocs[0].exp.X_op != O_constant, _("expression too complex")); + constraint (value > 0xf, _("immediate too large (bigger than 0xF)")); + inst.relocs[0].type = BFD_RELOC_UNUSED; - inst.instruction |= (value & 0xf000) >> 12; - inst.instruction |= (value & 0x0ff0); inst.instruction |= (value & 0x000f) << 16; + /* PR gas/15623: SMC instructions must be last in an IT block. */ set_pred_insn_type_last (); } @@ -14068,35 +14494,52 @@ v8_1_loop_reloc (int is_le) } } -/* To handle the Scalar Low Overhead Loop instructions - in Armv8.1-M Mainline. */ +/* For shifts with four operands in MVE. */ static void -do_t_loloop (void) +do_mve_scalar_shift1 (void) { - unsigned long insn = inst.instruction; + unsigned int value = inst.operands[2].imm; - set_pred_insn_type (OUTSIDE_PRED_INSN); - inst.instruction = THUMB_OP32 (inst.instruction); + inst.instruction |= inst.operands[0].reg << 16; + inst.instruction |= inst.operands[1].reg << 8; - switch (insn) - { - case T_MNEM_le: - /* le
, , #0 is a synonym for - VQMOVN.I
, . */ - if (imm == 0) + if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH)) + return; + + if (inst.operands[2].isscalar) { - inst.operands[2].present = 0; - inst.instruction = N_MNEM_vqmovn; - do_neon_qmovn (); - return; + constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU); + enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_I16 | N_I32 | N_F_16_32 | N_KEY); + NEON_ENCODE (SCALAR, inst); + neon_mul_mac (et, neon_quad (rs)); } + else if (!inst.operands[2].isvec) + { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU); - constraint (imm < 1 || (unsigned)imm > et.size, - _("immediate out of range")); - neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, et.size - imm); + enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY); + + neon_dyadic_misc (NT_unsigned, N_SU_MVE, 0); + } + else + { + constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU); + /* The "untyped" case can't happen. Do this to stop the "U" bit being + affected if we specify unsigned args. */ + neon_dyadic_misc (NT_untyped, N_IF_32, 0); + } } static void -do_neon_rshift_sat_narrow_u (void) +do_bfloat_vfma (void) { - /* FIXME: Types for narrowing. If operands are signed, results can be signed - or unsigned. If operands are unsigned, results must also be unsigned. */ - struct neon_type_el et = neon_check_type (2, NS_DQI, - N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY); - int imm = inst.operands[2].imm; - /* This gets the bounds check, size encoding and immediate bits calculation - right. */ - et.size /= 2; + constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU)); + constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16)); + enum neon_shape rs; + int t_bit = 0; - /* VQSHRUN.I
, , #0 is a synonym for - VQMOVUN.I
, . */ - if (imm == 0) + if (inst.instruction != B_MNEM_vfmab) + { + t_bit = 1; + inst.instruction = B_MNEM_vfmat; + } + + if (inst.operands[2].isscalar) { - inst.operands[2].present = 0; - inst.instruction = N_MNEM_vqmovun; - do_neon_qmovun (); - return; + rs = neon_select_shape (NS_QQS, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY); + + inst.instruction |= (1 << 25); + int index = inst.operands[2].reg & 0xf; + constraint (!(index < 4), _("index must be in the range 0 to 3")); + inst.operands[2].reg >>= 4; + constraint (!(inst.operands[2].reg < 8), + _("indexed register must be less than 8")); + neon_three_args (t_bit); + inst.instruction |= ((index & 1) << 3); + inst.instruction |= ((index & 2) << 4); + } + else + { + rs = neon_select_shape (NS_QQQ, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY); + neon_three_args (t_bit); } - constraint (imm < 1 || (unsigned)imm > et.size, - _("immediate out of range")); - /* FIXME: The manual is kind of unclear about what value U should have in - VQ{R}SHRUN instructions, but U=0, op=0 definitely encodes VRSHR, so it - must be 1. */ - neon_imm_shift (TRUE, 1, 0, et, et.size - imm); } static void -do_neon_movn (void) +do_neon_fmac (void) { - struct neon_type_el et = neon_check_type (2, NS_DQ, - N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY); - NEON_ENCODE (INTEGER, inst); - neon_two_same (0, 1, et.size / 2); + if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_fma) + && try_vfp_nsyn (3, do_vfp_nsyn_fma_fms) == SUCCESS) + return; + + if (!check_simd_pred_availability (TRUE, NEON_CHECK_CC | NEON_CHECK_ARCH)) + return; + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)) + { + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, N_F_MVE | N_KEY, N_EQK, + N_EQK); + + if (rs == NS_QQR) + { + + if (inst.operands[2].reg == REG_SP) + as_tsktsk (MVE_BAD_SP); + else if (inst.operands[2].reg == REG_PC) + as_tsktsk (MVE_BAD_PC); + + inst.instruction = 0xee310e40; + inst.instruction |= (et.size == 16) << 28; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[1].reg) << 6; + inst.instruction |= inst.operands[2].reg; + inst.is_neon = 1; + return; + } + } + else + { + constraint (!inst.operands[2].isvec, BAD_FPU); + } + + neon_dyadic_misc (NT_untyped, N_IF_32, 0); } static void -do_neon_rshift_narrow (void) +do_mve_vfma (void) { - struct neon_type_el et = neon_check_type (2, NS_DQI, - N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY); - int imm = inst.operands[2].imm; - /* This gets the bounds check, size encoding and immediate bits calculation - right. */ - et.size /= 2; - - /* If immediate is zero then we are a pseudo-instruction for - VMOVN.I
, */ - if (imm == 0) + if (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_bf16) && + inst.cond == COND_ALWAYS) { - inst.operands[2].present = 0; - inst.instruction = N_MNEM_vmovn; - do_neon_movn (); - return; + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU); + inst.instruction = N_MNEM_vfma; + inst.pred_insn_type = INSIDE_VPT_INSN; + inst.cond = 0xf; + return do_neon_fmac(); + } + else + { + do_bfloat_vfma(); } +} - constraint (imm < 1 || (unsigned)imm > et.size, - _("immediate out of range for narrowing operation")); - neon_imm_shift (FALSE, 0, 0, et, et.size - imm); +static void +do_neon_tst (void) +{ + enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_8 | N_16 | N_32 | N_KEY); + neon_three_same (neon_quad (rs), 0, et.size); } +/* VMUL with 3 registers allows the P8 type. The scalar version supports the + same types as the MAC equivalents. The polynomial type for this instruction + is encoded the same as the integer type. */ + static void -do_neon_shll (void) +do_neon_mul (void) { - /* FIXME: Type checking when lengthening. */ - struct neon_type_el et = neon_check_type (2, NS_QDI, - N_EQK | N_DBL, N_I8 | N_I16 | N_I32 | N_KEY); - unsigned imm = inst.operands[2].imm; + if (try_vfp_nsyn (3, do_vfp_nsyn_mul) == SUCCESS) + return; - if (imm == et.size) - { - /* Maximum shift variant. */ - NEON_ENCODE (INTEGER, inst); - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg); - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= neon_logbits (et.size) << 18; + if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH)) + return; - neon_dp_fixup (&inst); + if (inst.operands[2].isscalar) + { + constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU); + do_neon_mac_maybe_scalar (); } else { - /* A more-specific type check for non-max versions. */ - et = neon_check_type (2, NS_QDI, - N_EQK | N_DBL, N_SU_32 | N_KEY); - NEON_ENCODE (IMMED, inst); - neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, imm); + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + enum neon_shape rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL); + struct neon_type_el et + = neon_check_type (3, rs, N_EQK, N_EQK, N_I_MVE | N_F_MVE | N_KEY); + if (et.type == NT_float) + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext), + BAD_FPU); + + neon_dyadic_misc (NT_float, N_I_MVE | N_F_MVE, 0); + } + else + { + constraint (!inst.operands[2].isvec, BAD_FPU); + neon_dyadic_misc (NT_poly, + N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0); + } } } -/* Check the various types for the VCVT instruction, and return which version - the current instruction is. */ +static void +do_neon_qdmulh (void) +{ + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC)) + return; -#define CVT_FLAVOUR_VAR \ - CVT_VAR (s32_f32, N_S32, N_F32, whole_reg, "ftosls", "ftosis", "ftosizs") \ - CVT_VAR (u32_f32, N_U32, N_F32, whole_reg, "ftouls", "ftouis", "ftouizs") \ - CVT_VAR (f32_s32, N_F32, N_S32, whole_reg, "fsltos", "fsitos", NULL) \ - CVT_VAR (f32_u32, N_F32, N_U32, whole_reg, "fultos", "fuitos", NULL) \ - /* Half-precision conversions. */ \ - CVT_VAR (s16_f16, N_S16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \ - CVT_VAR (u16_f16, N_U16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \ - CVT_VAR (f16_s16, N_F16 | N_KEY, N_S16, whole_reg, NULL, NULL, NULL) \ - CVT_VAR (f16_u16, N_F16 | N_KEY, N_U16, whole_reg, NULL, NULL, NULL) \ - CVT_VAR (f32_f16, N_F32, N_F16, whole_reg, NULL, NULL, NULL) \ - CVT_VAR (f16_f32, N_F16, N_F32, whole_reg, NULL, NULL, NULL) \ - /* New VCVT instructions introduced by ARMv8.2 fp16 extension. \ - Compared with single/double precision variants, only the co-processor \ - field is different, so the encoding flow is reused here. */ \ - CVT_VAR (f16_s32, N_F16 | N_KEY, N_S32, N_VFP, "fsltos", "fsitos", NULL) \ - CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL) \ - CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\ - CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\ - /* VFP instructions. */ \ - CVT_VAR (f32_f64, N_F32, N_F64, N_VFP, NULL, "fcvtsd", NULL) \ - CVT_VAR (f64_f32, N_F64, N_F32, N_VFP, NULL, "fcvtds", NULL) \ - CVT_VAR (s32_f64, N_S32, N_F64 | key, N_VFP, "ftosld", "ftosid", "ftosizd") \ - CVT_VAR (u32_f64, N_U32, N_F64 | key, N_VFP, "ftould", "ftouid", "ftouizd") \ - CVT_VAR (f64_s32, N_F64 | key, N_S32, N_VFP, "fsltod", "fsitod", NULL) \ - CVT_VAR (f64_u32, N_F64 | key, N_U32, N_VFP, "fultod", "fuitod", NULL) \ - /* VFP instructions with bitshift. */ \ - CVT_VAR (f32_s16, N_F32 | key, N_S16, N_VFP, "fshtos", NULL, NULL) \ - CVT_VAR (f32_u16, N_F32 | key, N_U16, N_VFP, "fuhtos", NULL, NULL) \ - CVT_VAR (f64_s16, N_F64 | key, N_S16, N_VFP, "fshtod", NULL, NULL) \ - CVT_VAR (f64_u16, N_F64 | key, N_U16, N_VFP, "fuhtod", NULL, NULL) \ - CVT_VAR (s16_f32, N_S16, N_F32 | key, N_VFP, "ftoshs", NULL, NULL) \ - CVT_VAR (u16_f32, N_U16, N_F32 | key, N_VFP, "ftouhs", NULL, NULL) \ - CVT_VAR (s16_f64, N_S16, N_F64 | key, N_VFP, "ftoshd", NULL, NULL) \ - CVT_VAR (u16_f64, N_U16, N_F64 | key, N_VFP, "ftouhd", NULL, NULL) + if (inst.operands[2].isscalar) + { + constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU); + enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S16 | N_S32 | N_KEY); + NEON_ENCODE (SCALAR, inst); + neon_mul_mac (et, neon_quad (rs)); + } + else + { + enum neon_shape rs; + struct neon_type_el et; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL); + et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); + } + else + { + rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S16 | N_S32 | N_KEY); + } -#define CVT_VAR(C, X, Y, R, BSN, CN, ZN) \ - neon_cvt_flavour_##C, + NEON_ENCODE (INTEGER, inst); + if (rs == NS_QQR) + mve_encode_qqr (et.size, 0, 0); + else + /* The U bit (rounding) comes from bit mask. */ + neon_three_same (neon_quad (rs), 0, et.size); + } +} -/* The different types of conversions we can do. */ -enum neon_cvt_flavour +static void +do_mve_vaddv (void) { - CVT_FLAVOUR_VAR - neon_cvt_flavour_invalid, - neon_cvt_flavour_first_fp = neon_cvt_flavour_f32_f64 -}; - -#undef CVT_VAR + enum neon_shape rs = neon_select_shape (NS_RQ, NS_NULL); + struct neon_type_el et + = neon_check_type (2, rs, N_EQK, N_SU_32 | N_KEY); -static enum neon_cvt_flavour -get_neon_cvt_flavour (enum neon_shape rs) -{ -#define CVT_VAR(C,X,Y,R,BSN,CN,ZN) \ - et = neon_check_type (2, rs, (R) | (X), (R) | (Y)); \ - if (et.type != NT_invtype) \ - { \ - inst.error = NULL; \ - return (neon_cvt_flavour_##C); \ - } + if (et.type == NT_invtype) + first_error (BAD_EL_TYPE); - struct neon_type_el et; - unsigned whole_reg = (rs == NS_FFI || rs == NS_FD || rs == NS_DF - || rs == NS_FF) ? N_VFP : 0; - /* The instruction versions which take an immediate take one register - argument, which is extended to the width of the full register. Thus the - "source" and "destination" registers must have the same width. Hack that - here by making the size equal to the key (wider, in this case) operand. */ - unsigned key = (rs == NS_QQI || rs == NS_DDI || rs == NS_FFI) ? N_KEY : 0; + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; - CVT_FLAVOUR_VAR; + constraint (inst.operands[1].reg > 14, MVE_BAD_QREG); - return neon_cvt_flavour_invalid; -#undef CVT_VAR + mve_encode_rq (et.type == NT_unsigned, et.size); } -enum neon_cvt_mode +static void +do_mve_vhcadd (void) { - neon_cvt_mode_a, - neon_cvt_mode_n, - neon_cvt_mode_p, - neon_cvt_mode_m, - neon_cvt_mode_z, - neon_cvt_mode_x, - neon_cvt_mode_r -}; + enum neon_shape rs = neon_select_shape (NS_QQQI, NS_NULL); + struct neon_type_el et + = neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); -/* Neon-syntax VFP conversions. */ + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + unsigned rot = inst.relocs[0].exp.X_add_number; + constraint (rot != 90 && rot != 270, _("immediate out of range")); + + if (et.size == 32 && inst.operands[0].reg == inst.operands[2].reg) + as_tsktsk (_("Warning: 32-bit element size and same first and third " + "operand makes instruction UNPREDICTABLE")); + + mve_encode_qqq (0, et.size); + inst.instruction |= (rot == 270) << 12; + inst.is_neon = 1; +} static void -do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour) +do_mve_vqdmull (void) { - const char *opname = 0; + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL); + struct neon_type_el et + = neon_check_type (3, rs, N_EQK, N_EQK, N_S16 | N_S32 | N_KEY); - if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI - || rs == NS_FHI || rs == NS_HFI) - { - /* Conversions with immediate bitshift. */ - const char *enc[] = - { -#define CVT_VAR(C,A,B,R,BSN,CN,ZN) BSN, - CVT_FLAVOUR_VAR - NULL -#undef CVT_VAR - }; + if (et.size == 32 + && (inst.operands[0].reg == inst.operands[1].reg + || (rs == NS_QQQ && inst.operands[0].reg == inst.operands[2].reg))) + as_tsktsk (BAD_MVE_SRCDEST); - if (flavour < (int) ARRAY_SIZE (enc)) - { - opname = enc[flavour]; - constraint (inst.operands[0].reg != inst.operands[1].reg, - _("operands 0 and 1 must be the same register")); - inst.operands[1] = inst.operands[2]; - memset (&inst.operands[2], '\0', sizeof (inst.operands[2])); - } + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + if (rs == NS_QQQ) + { + mve_encode_qqq (et.size == 32, 64); + inst.instruction |= 1; } else { - /* Conversions without bitshift. */ - const char *enc[] = - { -#define CVT_VAR(C,A,B,R,BSN,CN,ZN) CN, - CVT_FLAVOUR_VAR - NULL -#undef CVT_VAR - }; - - if (flavour < (int) ARRAY_SIZE (enc)) - opname = enc[flavour]; + mve_encode_qqr (64, et.size == 32, 0); + inst.instruction |= 0x3 << 5; } +} - if (opname) - do_vfp_nsyn_opcode (opname); +static void +do_mve_vadc (void) +{ + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL); + struct neon_type_el et + = neon_check_type (3, rs, N_KEY | N_I32, N_EQK, N_EQK); - /* ARMv8.2 fp16 VCVT instruction. */ - if (flavour == neon_cvt_flavour_s32_f16 - || flavour == neon_cvt_flavour_u32_f16 - || flavour == neon_cvt_flavour_f16_u32 - || flavour == neon_cvt_flavour_f16_s32) - do_scalar_fp16_v82_encode (); + if (et.type == NT_invtype) + first_error (BAD_EL_TYPE); + + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + mve_encode_qqq (0, 64); } static void -do_vfp_nsyn_cvtz (void) +do_mve_vbrsr (void) { - enum neon_shape rs = neon_select_shape (NS_FH, NS_FF, NS_FD, NS_NULL); - enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs); - const char *enc[] = - { -#define CVT_VAR(C,A,B,R,BSN,CN,ZN) ZN, - CVT_FLAVOUR_VAR - NULL -#undef CVT_VAR - }; + enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL); + struct neon_type_el et + = neon_check_type (3, rs, N_EQK, N_EQK, N_8 | N_16 | N_32 | N_KEY); - if (flavour < (int) ARRAY_SIZE (enc) && enc[flavour]) - do_vfp_nsyn_opcode (enc[flavour]); + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + mve_encode_qqr (et.size, 0, 0); } static void -do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour, - enum neon_cvt_mode mode) +do_mve_vsbc (void) { - int sz, op; - int rm; + neon_check_type (3, NS_QQQ, N_EQK, N_EQK, N_I32 | N_KEY); - /* Targets like FPv5-SP-D16 don't support FP v8 instructions with - D register operands. */ - if (flavour == neon_cvt_flavour_s32_f64 - || flavour == neon_cvt_flavour_u32_f64) - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), - _(BAD_FPU)); + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; - if (flavour == neon_cvt_flavour_s32_f16 - || flavour == neon_cvt_flavour_u32_f16) - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16), - _(BAD_FP16)); + mve_encode_qqq (1, 64); +} - set_pred_insn_type (OUTSIDE_PRED_INSN); +static void +do_mve_vmulh (void) +{ + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL); + struct neon_type_el et + = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY); - switch (flavour) - { - case neon_cvt_flavour_s32_f64: - sz = 1; - op = 1; - break; - case neon_cvt_flavour_s32_f32: - sz = 0; - op = 1; - break; - case neon_cvt_flavour_s32_f16: - sz = 0; - op = 1; - break; - case neon_cvt_flavour_u32_f64: - sz = 1; - op = 0; - break; - case neon_cvt_flavour_u32_f32: - sz = 0; - op = 0; - break; - case neon_cvt_flavour_u32_f16: - sz = 0; - op = 0; - break; - default: - first_error (_("invalid instruction shape")); - return; - } + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; - switch (mode) - { - case neon_cvt_mode_a: rm = 0; break; - case neon_cvt_mode_n: rm = 1; break; - case neon_cvt_mode_p: rm = 2; break; - case neon_cvt_mode_m: rm = 3; break; - default: first_error (_("invalid rounding mode")); return; - } + mve_encode_qqq (et.type == NT_unsigned, et.size); +} - NEON_ENCODE (FPV8, inst); - encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); - encode_arm_vfp_reg (inst.operands[1].reg, sz == 1 ? VFP_REG_Dm : VFP_REG_Sm); - inst.instruction |= sz << 8; +static void +do_mve_vqdmlah (void) +{ + enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL); + struct neon_type_el et + = neon_check_type (3, rs, N_EQK, N_EQK, N_S_32 | N_KEY); - /* ARMv8.2 fp16 VCVT instruction. */ - if (flavour == neon_cvt_flavour_s32_f16 - ||flavour == neon_cvt_flavour_u32_f16) - do_scalar_fp16_v82_encode (); - inst.instruction |= op << 7; - inst.instruction |= rm << 16; - inst.instruction |= 0xf0000000; - inst.is_neon = TRUE; + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + mve_encode_qqr (et.size, et.type == NT_unsigned, 0); } static void -do_neon_cvt_1 (enum neon_cvt_mode mode) +do_mve_vqdmladh (void) { - enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ, - NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ, - NS_FH, NS_HF, NS_FHI, NS_HFI, - NS_NULL); - enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs); - - if (flavour == neon_cvt_flavour_invalid) - return; + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL); + struct neon_type_el et + = neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); - /* PR11109: Handle round-to-zero for VCVT conversions. */ - if (mode == neon_cvt_mode_z - && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_arch_vfp_v2) - && (flavour == neon_cvt_flavour_s16_f16 - || flavour == neon_cvt_flavour_u16_f16 - || flavour == neon_cvt_flavour_s32_f32 - || flavour == neon_cvt_flavour_u32_f32 - || flavour == neon_cvt_flavour_s32_f64 - || flavour == neon_cvt_flavour_u32_f64) - && (rs == NS_FD || rs == NS_FF)) - { - do_vfp_nsyn_cvtz (); - return; - } + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; - /* ARMv8.2 fp16 VCVT conversions. */ - if (mode == neon_cvt_mode_z - && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16) - && (flavour == neon_cvt_flavour_s32_f16 - || flavour == neon_cvt_flavour_u32_f16) - && (rs == NS_FH)) - { - do_vfp_nsyn_cvtz (); - do_scalar_fp16_v82_encode (); - return; - } + mve_encode_qqq (0, et.size); +} - /* VFP rather than Neon conversions. */ - if (flavour >= neon_cvt_flavour_first_fp) - { - if (mode == neon_cvt_mode_x || mode == neon_cvt_mode_z) - do_vfp_nsyn_cvt (rs, flavour); - else - do_vfp_nsyn_cvt_fpv8 (flavour, mode); - return; - } +static void +do_mve_vmull (void) +{ - switch (rs) + enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_DDS, + NS_QQS, NS_QQQ, NS_QQR, NS_NULL); + if (inst.cond == COND_ALWAYS + && ((unsigned)inst.instruction) == M_MNEM_vmullt) { - case NS_QQI: - if (mode == neon_cvt_mode_z - && (flavour == neon_cvt_flavour_f16_s16 - || flavour == neon_cvt_flavour_f16_u16 - || flavour == neon_cvt_flavour_s16_f16 - || flavour == neon_cvt_flavour_u16_f16 - || flavour == neon_cvt_flavour_f32_u32 - || flavour == neon_cvt_flavour_f32_s32 - || flavour == neon_cvt_flavour_s32_f32 - || flavour == neon_cvt_flavour_u32_f32)) - { - if (check_simd_pred_availability (1, NEON_CHECK_CC | NEON_CHECK_ARCH)) - return; - } - else if (mode == neon_cvt_mode_n) + + if (rs == NS_QQQ) { - /* We are dealing with vcvt with the 'ne' condition. */ - inst.cond = 0x1; - inst.instruction = N_MNEM_vcvt; - do_neon_cvt_1 (neon_cvt_mode_z); - return; + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + goto neon_vmul; } - /* fall through. */ - case NS_DDI: - { - unsigned immbits; - unsigned enctab[] = {0x0000100, 0x1000100, 0x0, 0x1000000, - 0x0000100, 0x1000100, 0x0, 0x1000000}; + else + goto neon_vmul; + } - if ((rs != NS_QQI || !ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)) - && vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) - return; + constraint (rs != NS_QQQ, BAD_FPU); + struct neon_type_el et = neon_check_type (3, rs, N_EQK , N_EQK, + N_SU_32 | N_P8 | N_P16 | N_KEY); - if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)) - { - constraint (inst.operands[2].present && inst.operands[2].imm == 0, - _("immediate value out of range")); - switch (flavour) - { - case neon_cvt_flavour_f16_s16: - case neon_cvt_flavour_f16_u16: - case neon_cvt_flavour_s16_f16: - case neon_cvt_flavour_u16_f16: - constraint (inst.operands[2].imm > 16, - _("immediate value out of range")); - break; - case neon_cvt_flavour_f32_u32: - case neon_cvt_flavour_f32_s32: - case neon_cvt_flavour_s32_f32: - case neon_cvt_flavour_u32_f32: - constraint (inst.operands[2].imm > 32, - _("immediate value out of range")); - break; - default: - inst.error = BAD_FPU; - return; - } - } + /* We are dealing with MVE's vmullt. */ + if (et.size == 32 + && (inst.operands[0].reg == inst.operands[1].reg + || inst.operands[0].reg == inst.operands[2].reg)) + as_tsktsk (BAD_MVE_SRCDEST); - /* Fixed-point conversion with #0 immediate is encoded as an - integer conversion. */ - if (inst.operands[2].present && inst.operands[2].imm == 0) - goto int_encode; - NEON_ENCODE (IMMED, inst); - if (flavour != neon_cvt_flavour_invalid) - inst.instruction |= enctab[flavour]; - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg); - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= 1 << 21; - if (flavour < neon_cvt_flavour_s16_f16) - { - inst.instruction |= 1 << 21; - immbits = 32 - inst.operands[2].imm; - inst.instruction |= immbits << 16; - } - else - { - inst.instruction |= 3 << 20; - immbits = 16 - inst.operands[2].imm; - inst.instruction |= immbits << 16; - inst.instruction &= ~(1 << 9); - } + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; - neon_dp_fixup (&inst); - } - break; + if (et.type == NT_poly) + mve_encode_qqq (neon_logbits (et.size), 64); + else + mve_encode_qqq (et.type == NT_unsigned, et.size); - case NS_QQ: - if ((mode == neon_cvt_mode_a || mode == neon_cvt_mode_n - || mode == neon_cvt_mode_m || mode == neon_cvt_mode_p) - && (flavour == neon_cvt_flavour_s16_f16 - || flavour == neon_cvt_flavour_u16_f16 - || flavour == neon_cvt_flavour_s32_f32 - || flavour == neon_cvt_flavour_u32_f32)) - { - if (check_simd_pred_availability (1, - NEON_CHECK_CC | NEON_CHECK_ARCH8)) - return; - } - else if (mode == neon_cvt_mode_z - && (flavour == neon_cvt_flavour_f16_s16 - || flavour == neon_cvt_flavour_f16_u16 - || flavour == neon_cvt_flavour_s16_f16 - || flavour == neon_cvt_flavour_u16_f16 - || flavour == neon_cvt_flavour_f32_u32 - || flavour == neon_cvt_flavour_f32_s32 - || flavour == neon_cvt_flavour_s32_f32 - || flavour == neon_cvt_flavour_u32_f32)) - { - if (check_simd_pred_availability (1, - NEON_CHECK_CC | NEON_CHECK_ARCH)) - return; - } - /* fall through. */ - case NS_DD: - if (mode != neon_cvt_mode_x && mode != neon_cvt_mode_z) - { + return; - NEON_ENCODE (FLOAT, inst); - if (check_simd_pred_availability (1, - NEON_CHECK_CC | NEON_CHECK_ARCH8)) - return; + neon_vmul: + inst.instruction = N_MNEM_vmul; + inst.cond = 0xb; + if (thumb_mode) + inst.pred_insn_type = INSIDE_IT_INSN; + do_neon_mul (); +} - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg); - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= (flavour == neon_cvt_flavour_u16_f16 - || flavour == neon_cvt_flavour_u32_f32) << 7; - inst.instruction |= mode << 8; - if (flavour == neon_cvt_flavour_u16_f16 - || flavour == neon_cvt_flavour_s16_f16) - /* Mask off the original size bits and reencode them. */ - inst.instruction = ((inst.instruction & 0xfff3ffff) | (1 << 18)); +static void +do_mve_vabav (void) +{ + enum neon_shape rs = neon_select_shape (NS_RQQ, NS_NULL); - if (thumb_mode) - inst.instruction |= 0xfc000000; - else - inst.instruction |= 0xf0000000; - } - else - { - int_encode: - { - unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080, - 0x100, 0x180, 0x0, 0x080}; + if (rs == NS_NULL) + return; - NEON_ENCODE (INTEGER, inst); + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + return; - if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)) - { - if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) - return; - } + struct neon_type_el et = neon_check_type (2, NS_NULL, N_EQK, N_KEY | N_S8 + | N_S16 | N_S32 | N_U8 | N_U16 + | N_U32); - if (flavour != neon_cvt_flavour_invalid) - inst.instruction |= enctab[flavour]; + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg); - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= neon_quad (rs) << 6; - if (flavour >= neon_cvt_flavour_s16_f16 - && flavour <= neon_cvt_flavour_f16_u16) - /* Half precision. */ - inst.instruction |= 1 << 18; - else - inst.instruction |= 2 << 18; + mve_encode_rqq (et.type == NT_unsigned, et.size); +} - neon_dp_fixup (&inst); - } - } - break; +static void +do_mve_vmladav (void) +{ + enum neon_shape rs = neon_select_shape (NS_RQQ, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_SU_MVE | N_KEY); - /* Half-precision conversions for Advanced SIMD -- neon. */ - case NS_QD: - case NS_DQ: - if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) - return; + if (et.type == NT_unsigned + && (inst.instruction == M_MNEM_vmladavx + || inst.instruction == M_MNEM_vmladavax + || inst.instruction == M_MNEM_vmlsdav + || inst.instruction == M_MNEM_vmlsdava + || inst.instruction == M_MNEM_vmlsdavx + || inst.instruction == M_MNEM_vmlsdavax)) + first_error (BAD_SIMD_TYPE); - if ((rs == NS_DQ) - && (inst.vectype.el[0].size != 16 || inst.vectype.el[1].size != 32)) - { - as_bad (_("operand size must match register width")); - break; - } + constraint (inst.operands[2].reg > 14, + _("MVE vector register in the range [Q0..Q7] expected")); - if ((rs == NS_QD) - && ((inst.vectype.el[0].size != 32 || inst.vectype.el[1].size != 16))) - { - as_bad (_("operand size must match register width")); - break; - } + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; - if (rs == NS_DQ) - inst.instruction = 0x3b60600; - else - inst.instruction = 0x3b60700; + if (inst.instruction == M_MNEM_vmlsdav + || inst.instruction == M_MNEM_vmlsdava + || inst.instruction == M_MNEM_vmlsdavx + || inst.instruction == M_MNEM_vmlsdavax) + inst.instruction |= (et.size == 8) << 28; + else + inst.instruction |= (et.size == 8) << 8; - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg); - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - neon_dp_fixup (&inst); - break; + mve_encode_rqq (et.type == NT_unsigned, 64); + inst.instruction |= (et.size == 32) << 16; +} - default: - /* Some VFP conversions go here (s32 <-> f32, u32 <-> f32). */ - if (mode == neon_cvt_mode_x || mode == neon_cvt_mode_z) - do_vfp_nsyn_cvt (rs, flavour); - else - do_vfp_nsyn_cvt_fpv8 (flavour, mode); +static void +do_mve_vmlaldav (void) +{ + enum neon_shape rs = neon_select_shape (NS_RRQQ, NS_NULL); + struct neon_type_el et + = neon_check_type (4, rs, N_EQK, N_EQK, N_EQK, + N_S16 | N_S32 | N_U16 | N_U32 | N_KEY); + + if (et.type == NT_unsigned + && (inst.instruction == M_MNEM_vmlsldav + || inst.instruction == M_MNEM_vmlsldava + || inst.instruction == M_MNEM_vmlsldavx + || inst.instruction == M_MNEM_vmlsldavax)) + first_error (BAD_SIMD_TYPE); + + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + mve_encode_rrqq (et.type == NT_unsigned, et.size); +} + +static void +do_mve_vrmlaldavh (void) +{ + struct neon_type_el et; + if (inst.instruction == M_MNEM_vrmlsldavh + || inst.instruction == M_MNEM_vrmlsldavha + || inst.instruction == M_MNEM_vrmlsldavhx + || inst.instruction == M_MNEM_vrmlsldavhax) + { + et = neon_check_type (4, NS_RRQQ, N_EQK, N_EQK, N_EQK, N_S32 | N_KEY); + if (inst.operands[1].reg == REG_SP) + as_tsktsk (MVE_BAD_SP); } + else + { + if (inst.instruction == M_MNEM_vrmlaldavhx + || inst.instruction == M_MNEM_vrmlaldavhax) + et = neon_check_type (4, NS_RRQQ, N_EQK, N_EQK, N_EQK, N_S32 | N_KEY); + else + et = neon_check_type (4, NS_RRQQ, N_EQK, N_EQK, N_EQK, + N_U32 | N_S32 | N_KEY); + /* vrmlaldavh's encoding with SP as the second, odd, GPR operand may alias + with vmax/min instructions, making the use of SP in assembly really + nonsensical, so instead of issuing a warning like we do for other uses + of SP for the odd register operand we error out. */ + constraint (inst.operands[1].reg == REG_SP, BAD_SP); + } + + /* Make sure we still check the second operand is an odd one and that PC is + disallowed. This because we are parsing for any GPR operand, to be able + to distinguish between giving a warning or an error for SP as described + above. */ + constraint ((inst.operands[1].reg % 2) != 1, BAD_EVEN); + constraint (inst.operands[1].reg == REG_PC, BAD_PC); + + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + mve_encode_rrqq (et.type == NT_unsigned, 0); } + static void -do_neon_cvtr (void) +do_mve_vmaxnmv (void) { - do_neon_cvt_1 (neon_cvt_mode_x); + enum neon_shape rs = neon_select_shape (NS_RQ, NS_NULL); + struct neon_type_el et + = neon_check_type (2, rs, N_EQK, N_F_MVE | N_KEY); + + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + if (inst.operands[0].reg == REG_SP) + as_tsktsk (MVE_BAD_SP); + else if (inst.operands[0].reg == REG_PC) + as_tsktsk (MVE_BAD_PC); + + mve_encode_rq (et.size == 16, 64); } static void -do_neon_cvt (void) +do_mve_vmaxv (void) { - do_neon_cvt_1 (neon_cvt_mode_z); + enum neon_shape rs = neon_select_shape (NS_RQ, NS_NULL); + struct neon_type_el et; + + if (inst.instruction == M_MNEM_vmaxv || inst.instruction == M_MNEM_vminv) + et = neon_check_type (2, rs, N_EQK, N_SU_MVE | N_KEY); + else + et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); + + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + if (inst.operands[0].reg == REG_SP) + as_tsktsk (MVE_BAD_SP); + else if (inst.operands[0].reg == REG_PC) + as_tsktsk (MVE_BAD_PC); + + mve_encode_rq (et.type == NT_unsigned, et.size); } + static void -do_neon_cvta (void) +do_neon_qrdmlah (void) { - do_neon_cvt_1 (neon_cvt_mode_a); + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC)) + return; + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + /* Check we're on the correct architecture. */ + if (!mark_feature_used (&fpu_neon_ext_armv8)) + inst.error + = _("instruction form not available on this architecture."); + else if (!mark_feature_used (&fpu_neon_ext_v8_1)) + { + as_warn (_("this instruction implies use of ARMv8.1 AdvSIMD.")); + record_feature_use (&fpu_neon_ext_v8_1); + } + if (inst.operands[2].isscalar) + { + enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S16 | N_S32 | N_KEY); + NEON_ENCODE (SCALAR, inst); + neon_mul_mac (et, neon_quad (rs)); + } + else + { + enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S16 | N_S32 | N_KEY); + NEON_ENCODE (INTEGER, inst); + /* The U bit (rounding) comes from bit mask. */ + neon_three_same (neon_quad (rs), 0, et.size); + } + } + else + { + enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL); + struct neon_type_el et + = neon_check_type (3, rs, N_EQK, N_EQK, N_S_32 | N_KEY); + + NEON_ENCODE (INTEGER, inst); + mve_encode_qqr (et.size, et.type == NT_unsigned, 0); + } } static void -do_neon_cvtn (void) +do_neon_fcmp_absolute (void) { - do_neon_cvt_1 (neon_cvt_mode_n); + enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK, + N_F_16_32 | N_KEY); + /* Size field comes from bit mask. */ + neon_three_same (neon_quad (rs), 1, et.size == 16 ? (int) et.size : -1); } static void -do_neon_cvtp (void) +do_neon_fcmp_absolute_inv (void) { - do_neon_cvt_1 (neon_cvt_mode_p); + neon_exchange_operands (); + do_neon_fcmp_absolute (); } static void -do_neon_cvtm (void) +do_neon_step (void) { - do_neon_cvt_1 (neon_cvt_mode_m); + enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK, + N_F_16_32 | N_KEY); + neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1); } static void -do_neon_cvttb_2 (bfd_boolean t, bfd_boolean to, bfd_boolean is_double) +do_neon_abs_neg (void) { - if (is_double) - mark_feature_used (&fpu_vfp_ext_armv8); + enum neon_shape rs; + struct neon_type_el et; - encode_arm_vfp_reg (inst.operands[0].reg, - (is_double && !to) ? VFP_REG_Dd : VFP_REG_Sd); - encode_arm_vfp_reg (inst.operands[1].reg, - (is_double && to) ? VFP_REG_Dm : VFP_REG_Sm); - inst.instruction |= to ? 0x10000 : 0; - inst.instruction |= t ? 0x80 : 0; - inst.instruction |= is_double ? 0x100 : 0; - do_vfp_cond_or_thumb (); + if (try_vfp_nsyn (2, do_vfp_nsyn_abs_neg) == SUCCESS) + return; + + rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY); + + if (!check_simd_pred_availability (et.type == NT_float, + NEON_CHECK_ARCH | NEON_CHECK_CC)) + return; + + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= neon_quad (rs) << 6; + inst.instruction |= (et.type == NT_float) << 10; + inst.instruction |= neon_logbits (et.size) << 18; + + neon_dp_fixup (&inst); } static void -do_neon_cvttb_1 (bfd_boolean t) +do_neon_sli (void) { - enum neon_shape rs = neon_select_shape (NS_HF, NS_HD, NS_FH, NS_FF, NS_FD, - NS_DF, NS_DH, NS_QQ, NS_QQI, NS_NULL); - - if (rs == NS_NULL) + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC)) return; - else if (rs == NS_QQ || rs == NS_QQI) + + enum neon_shape rs; + struct neon_type_el et; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) { - int single_to_half = 0; - if (check_simd_pred_availability (1, NEON_CHECK_ARCH)) - return; + rs = neon_select_shape (NS_QQI, NS_NULL); + et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_KEY); + } + else + { + rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL); + et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY); + } - enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs); - if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) - && (flavour == neon_cvt_flavour_u16_f16 - || flavour == neon_cvt_flavour_s16_f16 - || flavour == neon_cvt_flavour_f16_s16 - || flavour == neon_cvt_flavour_f16_u16 - || flavour == neon_cvt_flavour_u32_f32 - || flavour == neon_cvt_flavour_s32_f32 - || flavour == neon_cvt_flavour_f32_s32 - || flavour == neon_cvt_flavour_f32_u32)) - { - inst.cond = 0xf; - inst.instruction = N_MNEM_vcvt; - set_pred_insn_type (INSIDE_VPT_INSN); - do_neon_cvt_1 (neon_cvt_mode_z); - return; - } - else if (rs == NS_QQ && flavour == neon_cvt_flavour_f32_f16) - single_to_half = 1; - else if (rs == NS_QQ && flavour != neon_cvt_flavour_f16_f32) - { - first_error (BAD_FPU); - return; - } + int imm = inst.operands[2].imm; + constraint (imm < 0 || (unsigned)imm >= et.size, + _("immediate out of range for insert")); + neon_imm_shift (FALSE, 0, neon_quad (rs), et, imm); +} - inst.instruction = 0xee3f0e01; - inst.instruction |= single_to_half << 28; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[0].reg) << 13; - inst.instruction |= t << 12; - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= LOW4 (inst.operands[1].reg) << 1; - inst.is_neon = 1; - } - else if (neon_check_type (2, rs, N_F16, N_F32 | N_VFP).type != NT_invtype) +static void +do_neon_sri (void) +{ + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC)) + return; + + enum neon_shape rs; + struct neon_type_el et; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) { - inst.error = NULL; - do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/FALSE); + rs = neon_select_shape (NS_QQI, NS_NULL); + et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_KEY); } - else if (neon_check_type (2, rs, N_F32 | N_VFP, N_F16).type != NT_invtype) + else { - inst.error = NULL; - do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/FALSE); + rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL); + et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY); } - else if (neon_check_type (2, rs, N_F16, N_F64 | N_VFP).type != NT_invtype) - { - /* The VCVTB and VCVTT instructions with D-register operands - don't work for SP only targets. */ - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), - _(BAD_FPU)); - inst.error = NULL; - do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/TRUE); - } - else if (neon_check_type (2, rs, N_F64 | N_VFP, N_F16).type != NT_invtype) - { - /* The VCVTB and VCVTT instructions with D-register operands - don't work for SP only targets. */ - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), - _(BAD_FPU)); + int imm = inst.operands[2].imm; + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range for insert")); + neon_imm_shift (FALSE, 0, neon_quad (rs), et, et.size - imm); +} - inst.error = NULL; - do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE); +static void +do_neon_qshlu_imm (void) +{ + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC)) + return; + + enum neon_shape rs; + struct neon_type_el et; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + rs = neon_select_shape (NS_QQI, NS_NULL); + et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); } else - return; + { + rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL); + et = neon_check_type (2, rs, N_EQK | N_UNS, + N_S8 | N_S16 | N_S32 | N_S64 | N_KEY); + } + + int imm = inst.operands[2].imm; + constraint (imm < 0 || (unsigned)imm >= et.size, + _("immediate out of range for shift")); + /* Only encodes the 'U present' variant of the instruction. + In this case, signed types have OP (bit 8) set to 0. + Unsigned types have OP set to 1. */ + inst.instruction |= (et.type == NT_unsigned) << 8; + /* The rest of the bits are the same as other immediate shifts. */ + neon_imm_shift (FALSE, 0, neon_quad (rs), et, imm); } static void -do_neon_cvtb (void) +do_neon_qmovn (void) { - do_neon_cvttb_1 (FALSE); + struct neon_type_el et = neon_check_type (2, NS_DQ, + N_EQK | N_HLF, N_SU_16_64 | N_KEY); + /* Saturating move where operands can be signed or unsigned, and the + destination has the same signedness. */ + NEON_ENCODE (INTEGER, inst); + if (et.type == NT_unsigned) + inst.instruction |= 0xc0; + else + inst.instruction |= 0x80; + neon_two_same (0, 1, et.size / 2); } - static void -do_neon_cvtt (void) +do_neon_qmovun (void) { - do_neon_cvttb_1 (TRUE); + struct neon_type_el et = neon_check_type (2, NS_DQ, + N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY); + /* Saturating move with unsigned results. Operands must be signed. */ + NEON_ENCODE (INTEGER, inst); + neon_two_same (0, 1, et.size / 2); } static void -neon_move_immediate (void) +do_neon_rshift_sat_narrow (void) { - enum neon_shape rs = neon_select_shape (NS_DI, NS_QI, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_I8 | N_I16 | N_I32 | N_I64 | N_F32 | N_KEY, N_EQK); - unsigned immlo, immhi = 0, immbits; - int op, cmode, float_p; - - constraint (et.type == NT_invtype, - _("operand size must be specified for immediate VMOV")); - - /* We start out as an MVN instruction if OP = 1, MOV otherwise. */ - op = (inst.instruction & (1 << 5)) != 0; + /* FIXME: Types for narrowing. If operands are signed, results can be signed + or unsigned. If operands are unsigned, results must also be unsigned. */ + struct neon_type_el et = neon_check_type (2, NS_DQI, + N_EQK | N_HLF, N_SU_16_64 | N_KEY); + int imm = inst.operands[2].imm; + /* This gets the bounds check, size encoding and immediate bits calculation + right. */ + et.size /= 2; - immlo = inst.operands[1].imm; - if (inst.operands[1].regisimm) - immhi = inst.operands[1].reg; + /* VQ{R}SHRN.I
, , #0 is a synonym for + VQMOVN.I
, . */ + if (imm == 0) + { + inst.operands[2].present = 0; + inst.instruction = N_MNEM_vqmovn; + do_neon_qmovn (); + return; + } - constraint (et.size < 32 && (immlo & ~((1 << et.size) - 1)) != 0, - _("immediate has bits set outside the operand size")); + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range")); + neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, et.size - imm); +} - float_p = inst.operands[1].immisfloat; +static void +do_neon_rshift_sat_narrow_u (void) +{ + /* FIXME: Types for narrowing. If operands are signed, results can be signed + or unsigned. If operands are unsigned, results must also be unsigned. */ + struct neon_type_el et = neon_check_type (2, NS_DQI, + N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY); + int imm = inst.operands[2].imm; + /* This gets the bounds check, size encoding and immediate bits calculation + right. */ + et.size /= 2; - if ((cmode = neon_cmode_for_move_imm (immlo, immhi, float_p, &immbits, &op, - et.size, et.type)) == FAIL) + /* VQSHRUN.I
, , #0 is a synonym for + VQMOVUN.I
, . */ + if (imm == 0) { - /* Invert relevant bits only. */ - neon_invert_size (&immlo, &immhi, et.size); - /* Flip from VMOV/VMVN to VMVN/VMOV. Some immediate types are unavailable - with one or the other; those cases are caught by - neon_cmode_for_move_imm. */ - op = !op; - if ((cmode = neon_cmode_for_move_imm (immlo, immhi, float_p, &immbits, - &op, et.size, et.type)) == FAIL) - { - first_error (_("immediate out of range")); - return; - } + inst.operands[2].present = 0; + inst.instruction = N_MNEM_vqmovun; + do_neon_qmovun (); + return; } - inst.instruction &= ~(1 << 5); - inst.instruction |= op << 5; - - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= cmode << 8; + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range")); + /* FIXME: The manual is kind of unclear about what value U should have in + VQ{R}SHRUN instructions, but U=0, op=0 definitely encodes VRSHR, so it + must be 1. */ + neon_imm_shift (TRUE, 1, 0, et, et.size - imm); +} - neon_write_immbits (immbits); +static void +do_neon_movn (void) +{ + struct neon_type_el et = neon_check_type (2, NS_DQ, + N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY); + NEON_ENCODE (INTEGER, inst); + neon_two_same (0, 1, et.size / 2); } static void -do_neon_mvn (void) +do_neon_rshift_narrow (void) { - if (inst.operands[1].isreg) + struct neon_type_el et = neon_check_type (2, NS_DQI, + N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY); + int imm = inst.operands[2].imm; + /* This gets the bounds check, size encoding and immediate bits calculation + right. */ + et.size /= 2; + + /* If immediate is zero then we are a pseudo-instruction for + VMOVN.I
, */ + if (imm == 0) { - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + inst.operands[2].present = 0; + inst.instruction = N_MNEM_vmovn; + do_neon_movn (); + return; + } + + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range for narrowing operation")); + neon_imm_shift (FALSE, 0, 0, et, et.size - imm); +} + +static void +do_neon_shll (void) +{ + /* FIXME: Type checking when lengthening. */ + struct neon_type_el et = neon_check_type (2, NS_QDI, + N_EQK | N_DBL, N_I8 | N_I16 | N_I32 | N_KEY); + unsigned imm = inst.operands[2].imm; + if (imm == et.size) + { + /* Maximum shift variant. */ NEON_ENCODE (INTEGER, inst); inst.instruction |= LOW4 (inst.operands[0].reg) << 12; inst.instruction |= HI1 (inst.operands[0].reg) << 22; inst.instruction |= LOW4 (inst.operands[1].reg); inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= neon_quad (rs) << 6; + inst.instruction |= neon_logbits (et.size) << 18; + + neon_dp_fixup (&inst); } else { + /* A more-specific type check for non-max versions. */ + et = neon_check_type (2, NS_QDI, + N_EQK | N_DBL, N_SU_32 | N_KEY); NEON_ENCODE (IMMED, inst); - neon_move_immediate (); + neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, imm); } - - neon_dp_fixup (&inst); } -/* Encode instructions of form: - - |28/24|23|22|21 20|19 16|15 12|11 8|7|6|5|4|3 0| - | U |x |D |size | Rn | Rd |x x x x|N|x|M|x| Rm | */ +/* Check the various types for the VCVT instruction, and return which version + the current instruction is. */ -static void -neon_mixed_length (struct neon_type_el et, unsigned size) -{ - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg) << 16; - inst.instruction |= HI1 (inst.operands[1].reg) << 7; - inst.instruction |= LOW4 (inst.operands[2].reg); - inst.instruction |= HI1 (inst.operands[2].reg) << 5; - inst.instruction |= (et.type == NT_unsigned) << 24; - inst.instruction |= neon_logbits (size) << 20; +#define CVT_FLAVOUR_VAR \ + CVT_VAR (s32_f32, N_S32, N_F32, whole_reg, "ftosls", "ftosis", "ftosizs") \ + CVT_VAR (u32_f32, N_U32, N_F32, whole_reg, "ftouls", "ftouis", "ftouizs") \ + CVT_VAR (f32_s32, N_F32, N_S32, whole_reg, "fsltos", "fsitos", NULL) \ + CVT_VAR (f32_u32, N_F32, N_U32, whole_reg, "fultos", "fuitos", NULL) \ + /* Half-precision conversions. */ \ + CVT_VAR (s16_f16, N_S16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (u16_f16, N_U16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (f16_s16, N_F16 | N_KEY, N_S16, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (f16_u16, N_F16 | N_KEY, N_U16, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (f32_f16, N_F32, N_F16, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (f16_f32, N_F16, N_F32, whole_reg, NULL, NULL, NULL) \ + /* New VCVT instructions introduced by ARMv8.2 fp16 extension. \ + Compared with single/double precision variants, only the co-processor \ + field is different, so the encoding flow is reused here. */ \ + CVT_VAR (f16_s32, N_F16 | N_KEY, N_S32, N_VFP, "fsltos", "fsitos", NULL) \ + CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL) \ + CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\ + CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\ + CVT_VAR (bf16_f32, N_BF16, N_F32, whole_reg, NULL, NULL, NULL) \ + /* VFP instructions. */ \ + CVT_VAR (f32_f64, N_F32, N_F64, N_VFP, NULL, "fcvtsd", NULL) \ + CVT_VAR (f64_f32, N_F64, N_F32, N_VFP, NULL, "fcvtds", NULL) \ + CVT_VAR (s32_f64, N_S32, N_F64 | key, N_VFP, "ftosld", "ftosid", "ftosizd") \ + CVT_VAR (u32_f64, N_U32, N_F64 | key, N_VFP, "ftould", "ftouid", "ftouizd") \ + CVT_VAR (f64_s32, N_F64 | key, N_S32, N_VFP, "fsltod", "fsitod", NULL) \ + CVT_VAR (f64_u32, N_F64 | key, N_U32, N_VFP, "fultod", "fuitod", NULL) \ + /* VFP instructions with bitshift. */ \ + CVT_VAR (f32_s16, N_F32 | key, N_S16, N_VFP, "fshtos", NULL, NULL) \ + CVT_VAR (f32_u16, N_F32 | key, N_U16, N_VFP, "fuhtos", NULL, NULL) \ + CVT_VAR (f64_s16, N_F64 | key, N_S16, N_VFP, "fshtod", NULL, NULL) \ + CVT_VAR (f64_u16, N_F64 | key, N_U16, N_VFP, "fuhtod", NULL, NULL) \ + CVT_VAR (s16_f32, N_S16, N_F32 | key, N_VFP, "ftoshs", NULL, NULL) \ + CVT_VAR (u16_f32, N_U16, N_F32 | key, N_VFP, "ftouhs", NULL, NULL) \ + CVT_VAR (s16_f64, N_S16, N_F64 | key, N_VFP, "ftoshd", NULL, NULL) \ + CVT_VAR (u16_f64, N_U16, N_F64 | key, N_VFP, "ftouhd", NULL, NULL) - neon_dp_fixup (&inst); -} +#define CVT_VAR(C, X, Y, R, BSN, CN, ZN) \ + neon_cvt_flavour_##C, -static void -do_neon_dyadic_long (void) +/* The different types of conversions we can do. */ +enum neon_cvt_flavour { - enum neon_shape rs = neon_select_shape (NS_QDD, NS_QQQ, NS_QQR, NS_NULL); - if (rs == NS_QDD) - { - if (vfp_or_neon_is_neon (NEON_CHECK_ARCH | NEON_CHECK_CC) == FAIL) - return; + CVT_FLAVOUR_VAR + neon_cvt_flavour_invalid, + neon_cvt_flavour_first_fp = neon_cvt_flavour_f32_f64 +}; - NEON_ENCODE (INTEGER, inst); - /* FIXME: Type checking for lengthening op. */ - struct neon_type_el et = neon_check_type (3, NS_QDD, - N_EQK | N_DBL, N_EQK, N_SU_32 | N_KEY); - neon_mixed_length (et, et.size); +#undef CVT_VAR + +static enum neon_cvt_flavour +get_neon_cvt_flavour (enum neon_shape rs) +{ +#define CVT_VAR(C,X,Y,R,BSN,CN,ZN) \ + et = neon_check_type (2, rs, (R) | (X), (R) | (Y)); \ + if (et.type != NT_invtype) \ + { \ + inst.error = NULL; \ + return (neon_cvt_flavour_##C); \ } - else if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) - && (inst.cond == 0xf || inst.cond == 0x10)) - { - /* If parsing for MVE, vaddl/vsubl/vabdl{e,t} can only be vadd/vsub/vabd - in an IT block with le/lt conditions. */ - if (inst.cond == 0xf) - inst.cond = 0xb; - else if (inst.cond == 0x10) - inst.cond = 0xd; + struct neon_type_el et; + unsigned whole_reg = (rs == NS_FFI || rs == NS_FD || rs == NS_DF + || rs == NS_FF) ? N_VFP : 0; + /* The instruction versions which take an immediate take one register + argument, which is extended to the width of the full register. Thus the + "source" and "destination" registers must have the same width. Hack that + here by making the size equal to the key (wider, in this case) operand. */ + unsigned key = (rs == NS_QQI || rs == NS_DDI || rs == NS_FFI) ? N_KEY : 0; - inst.pred_insn_type = INSIDE_IT_INSN; + CVT_FLAVOUR_VAR; - if (inst.instruction == N_MNEM_vaddl) - { - inst.instruction = N_MNEM_vadd; - do_neon_addsub_if_i (); - } - else if (inst.instruction == N_MNEM_vsubl) - { - inst.instruction = N_MNEM_vsub; - do_neon_addsub_if_i (); - } - else if (inst.instruction == N_MNEM_vabdl) - { - inst.instruction = N_MNEM_vabd; - do_neon_dyadic_if_su (); - } - } - else - first_error (BAD_FPU); + return neon_cvt_flavour_invalid; +#undef CVT_VAR } -static void -do_neon_abal (void) +enum neon_cvt_mode { - struct neon_type_el et = neon_check_type (3, NS_QDD, - N_EQK | N_INT | N_DBL, N_EQK, N_SU_32 | N_KEY); - neon_mixed_length (et, et.size); -} + neon_cvt_mode_a, + neon_cvt_mode_n, + neon_cvt_mode_p, + neon_cvt_mode_m, + neon_cvt_mode_z, + neon_cvt_mode_x, + neon_cvt_mode_r +}; -static void -neon_mac_reg_scalar_long (unsigned regtypes, unsigned scalartypes) -{ - if (inst.operands[2].isscalar) - { - struct neon_type_el et = neon_check_type (3, NS_QDS, - N_EQK | N_DBL, N_EQK, regtypes | N_KEY); - NEON_ENCODE (SCALAR, inst); - neon_mul_mac (et, et.type == NT_unsigned); - } - else - { - struct neon_type_el et = neon_check_type (3, NS_QDD, - N_EQK | N_DBL, N_EQK, scalartypes | N_KEY); - NEON_ENCODE (INTEGER, inst); - neon_mixed_length (et, et.size); - } -} +/* Neon-syntax VFP conversions. */ static void -do_neon_mac_maybe_scalar_long (void) -{ - neon_mac_reg_scalar_long (N_S16 | N_S32 | N_U16 | N_U32, N_SU_32); -} - -/* Like neon_scalar_for_mul, this function generate Rm encoding from GAS's - internal SCALAR. QUAD_P is 1 if it's for Q format, otherwise it's 0. */ - -static unsigned -neon_scalar_for_fmac_fp16_long (unsigned scalar, unsigned quad_p) +do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour) { - unsigned regno = NEON_SCALAR_REG (scalar); - unsigned elno = NEON_SCALAR_INDEX (scalar); + const char *opname = 0; - if (quad_p) + if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI + || rs == NS_FHI || rs == NS_HFI) { - if (regno > 7 || elno > 3) - goto bad_scalar; + /* Conversions with immediate bitshift. */ + const char *enc[] = + { +#define CVT_VAR(C,A,B,R,BSN,CN,ZN) BSN, + CVT_FLAVOUR_VAR + NULL +#undef CVT_VAR + }; - return ((regno & 0x7) - | ((elno & 0x1) << 3) - | (((elno >> 1) & 0x1) << 5)); + if (flavour < (int) ARRAY_SIZE (enc)) + { + opname = enc[flavour]; + constraint (inst.operands[0].reg != inst.operands[1].reg, + _("operands 0 and 1 must be the same register")); + inst.operands[1] = inst.operands[2]; + memset (&inst.operands[2], '\0', sizeof (inst.operands[2])); + } } else { - if (regno > 15 || elno > 1) - goto bad_scalar; + /* Conversions without bitshift. */ + const char *enc[] = + { +#define CVT_VAR(C,A,B,R,BSN,CN,ZN) CN, + CVT_FLAVOUR_VAR + NULL +#undef CVT_VAR + }; - return (((regno & 0x1) << 5) - | ((regno >> 1) & 0x7) - | ((elno & 0x1) << 3)); + if (flavour < (int) ARRAY_SIZE (enc)) + opname = enc[flavour]; } -bad_scalar: - first_error (_("scalar out of range for multiply instruction")); - return 0; + if (opname) + do_vfp_nsyn_opcode (opname); + + /* ARMv8.2 fp16 VCVT instruction. */ + if (flavour == neon_cvt_flavour_s32_f16 + || flavour == neon_cvt_flavour_u32_f16 + || flavour == neon_cvt_flavour_f16_u32 + || flavour == neon_cvt_flavour_f16_s32) + do_scalar_fp16_v82_encode (); } static void -do_neon_fmac_maybe_scalar_long (int subtype) +do_vfp_nsyn_cvtz (void) { - enum neon_shape rs; - int high8; - /* NOTE: vfmal/vfmsl use slightly different NEON three-same encoding. 'size" - field (bits[21:20]) has different meaning. For scalar index variant, it's - used to differentiate add and subtract, otherwise it's with fixed value - 0x2. */ - int size = -1; + enum neon_shape rs = neon_select_shape (NS_FH, NS_FF, NS_FD, NS_NULL); + enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs); + const char *enc[] = + { +#define CVT_VAR(C,A,B,R,BSN,CN,ZN) ZN, + CVT_FLAVOUR_VAR + NULL +#undef CVT_VAR + }; - if (inst.cond != COND_ALWAYS) - as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the " - "behaviour is UNPREDICTABLE")); + if (flavour < (int) ARRAY_SIZE (enc) && enc[flavour]) + do_vfp_nsyn_opcode (enc[flavour]); +} - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml), - _(BAD_FP16)); +static void +do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour, + enum neon_cvt_mode mode) +{ + int sz, op; + int rm; - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8), - _(BAD_FPU)); + /* Targets like FPv5-SP-D16 don't support FP v8 instructions with + D register operands. */ + if (flavour == neon_cvt_flavour_s32_f64 + || flavour == neon_cvt_flavour_u32_f64) + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), + _(BAD_FPU)); - /* vfmal/vfmsl are in three-same D/Q register format or the third operand can - be a scalar index register. */ - if (inst.operands[2].isscalar) - { - high8 = 0xfe000000; - if (subtype) - size = 16; - rs = neon_select_shape (NS_DHS, NS_QDS, NS_NULL); + if (flavour == neon_cvt_flavour_s32_f16 + || flavour == neon_cvt_flavour_u32_f16) + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16), + _(BAD_FP16)); + + set_pred_insn_type (OUTSIDE_PRED_INSN); + + switch (flavour) + { + case neon_cvt_flavour_s32_f64: + sz = 1; + op = 1; + break; + case neon_cvt_flavour_s32_f32: + sz = 0; + op = 1; + break; + case neon_cvt_flavour_s32_f16: + sz = 0; + op = 1; + break; + case neon_cvt_flavour_u32_f64: + sz = 1; + op = 0; + break; + case neon_cvt_flavour_u32_f32: + sz = 0; + op = 0; + break; + case neon_cvt_flavour_u32_f16: + sz = 0; + op = 0; + break; + default: + first_error (_("invalid instruction shape")); + return; } - else + + switch (mode) { - high8 = 0xfc000000; - size = 32; - if (subtype) - inst.instruction |= (0x1 << 23); - rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL); + case neon_cvt_mode_a: rm = 0; break; + case neon_cvt_mode_n: rm = 1; break; + case neon_cvt_mode_p: rm = 2; break; + case neon_cvt_mode_m: rm = 3; break; + default: first_error (_("invalid rounding mode")); return; } - neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16); + NEON_ENCODE (FPV8, inst); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); + encode_arm_vfp_reg (inst.operands[1].reg, sz == 1 ? VFP_REG_Dm : VFP_REG_Sm); + inst.instruction |= sz << 8; - /* "opcode" from template has included "ubit", so simply pass 0 here. Also, - the "S" bit in size field has been reused to differentiate vfmal and vfmsl, - so we simply pass -1 as size. */ - unsigned quad_p = (rs == NS_QDD || rs == NS_QDS); - neon_three_same (quad_p, 0, size); + /* ARMv8.2 fp16 VCVT instruction. */ + if (flavour == neon_cvt_flavour_s32_f16 + ||flavour == neon_cvt_flavour_u32_f16) + do_scalar_fp16_v82_encode (); + inst.instruction |= op << 7; + inst.instruction |= rm << 16; + inst.instruction |= 0xf0000000; + inst.is_neon = TRUE; +} - /* Undo neon_dp_fixup. Redo the high eight bits. */ - inst.instruction &= 0x00ffffff; - inst.instruction |= high8; +static void +do_neon_cvt_1 (enum neon_cvt_mode mode) +{ + enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ, + NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ, + NS_FH, NS_HF, NS_FHI, NS_HFI, + NS_NULL); + enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs); -#define LOW1(R) ((R) & 0x1) -#define HI4(R) (((R) >> 1) & 0xf) - /* Unlike usually NEON three-same, encoding for Vn and Vm will depend on - whether the instruction is in Q form and whether Vm is a scalar indexed - operand. */ - if (inst.operands[2].isscalar) + if (flavour == neon_cvt_flavour_invalid) + return; + + /* PR11109: Handle round-to-zero for VCVT conversions. */ + if (mode == neon_cvt_mode_z + && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_arch_vfp_v2) + && (flavour == neon_cvt_flavour_s16_f16 + || flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_s32_f32 + || flavour == neon_cvt_flavour_u32_f32 + || flavour == neon_cvt_flavour_s32_f64 + || flavour == neon_cvt_flavour_u32_f64) + && (rs == NS_FD || rs == NS_FF)) { - unsigned rm - = neon_scalar_for_fmac_fp16_long (inst.operands[2].reg, quad_p); - inst.instruction &= 0xffffffd0; - inst.instruction |= rm; + do_vfp_nsyn_cvtz (); + return; + } - if (!quad_p) - { - /* Redo Rn as well. */ - inst.instruction &= 0xfff0ff7f; - inst.instruction |= HI4 (inst.operands[1].reg) << 16; - inst.instruction |= LOW1 (inst.operands[1].reg) << 7; - } + /* ARMv8.2 fp16 VCVT conversions. */ + if (mode == neon_cvt_mode_z + && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16) + && (flavour == neon_cvt_flavour_s32_f16 + || flavour == neon_cvt_flavour_u32_f16) + && (rs == NS_FH)) + { + do_vfp_nsyn_cvtz (); + do_scalar_fp16_v82_encode (); + return; } - else if (!quad_p) + + /* VFP rather than Neon conversions. */ + if (flavour >= neon_cvt_flavour_first_fp) { - /* Redo Rn and Rm. */ - inst.instruction &= 0xfff0ff50; - inst.instruction |= HI4 (inst.operands[1].reg) << 16; - inst.instruction |= LOW1 (inst.operands[1].reg) << 7; - inst.instruction |= HI4 (inst.operands[2].reg); - inst.instruction |= LOW1 (inst.operands[2].reg) << 5; + if (mode == neon_cvt_mode_x || mode == neon_cvt_mode_z) + do_vfp_nsyn_cvt (rs, flavour); + else + do_vfp_nsyn_cvt_fpv8 (flavour, mode); + + return; + } + + switch (rs) + { + case NS_QQI: + if (mode == neon_cvt_mode_z + && (flavour == neon_cvt_flavour_f16_s16 + || flavour == neon_cvt_flavour_f16_u16 + || flavour == neon_cvt_flavour_s16_f16 + || flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_f32_u32 + || flavour == neon_cvt_flavour_f32_s32 + || flavour == neon_cvt_flavour_s32_f32 + || flavour == neon_cvt_flavour_u32_f32)) + { + if (!check_simd_pred_availability (TRUE, + NEON_CHECK_CC | NEON_CHECK_ARCH)) + return; + } + else if (mode == neon_cvt_mode_n) + { + /* We are dealing with vcvt with the 'ne' condition. */ + inst.cond = 0x1; + inst.instruction = N_MNEM_vcvt; + do_neon_cvt_1 (neon_cvt_mode_z); + return; + } + /* fall through. */ + case NS_DDI: + { + unsigned immbits; + unsigned enctab[] = {0x0000100, 0x1000100, 0x0, 0x1000000, + 0x0000100, 0x1000100, 0x0, 0x1000000}; + + if ((rs != NS_QQI || !ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)) + && vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) + return; + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)) + { + constraint (inst.operands[2].present && inst.operands[2].imm == 0, + _("immediate value out of range")); + switch (flavour) + { + case neon_cvt_flavour_f16_s16: + case neon_cvt_flavour_f16_u16: + case neon_cvt_flavour_s16_f16: + case neon_cvt_flavour_u16_f16: + constraint (inst.operands[2].imm > 16, + _("immediate value out of range")); + break; + case neon_cvt_flavour_f32_u32: + case neon_cvt_flavour_f32_s32: + case neon_cvt_flavour_s32_f32: + case neon_cvt_flavour_u32_f32: + constraint (inst.operands[2].imm > 32, + _("immediate value out of range")); + break; + default: + inst.error = BAD_FPU; + return; + } + } + + /* Fixed-point conversion with #0 immediate is encoded as an + integer conversion. */ + if (inst.operands[2].present && inst.operands[2].imm == 0) + goto int_encode; + NEON_ENCODE (IMMED, inst); + if (flavour != neon_cvt_flavour_invalid) + inst.instruction |= enctab[flavour]; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= neon_quad (rs) << 6; + inst.instruction |= 1 << 21; + if (flavour < neon_cvt_flavour_s16_f16) + { + inst.instruction |= 1 << 21; + immbits = 32 - inst.operands[2].imm; + inst.instruction |= immbits << 16; + } + else + { + inst.instruction |= 3 << 20; + immbits = 16 - inst.operands[2].imm; + inst.instruction |= immbits << 16; + inst.instruction &= ~(1 << 9); + } + + neon_dp_fixup (&inst); + } + break; + + case NS_QQ: + if ((mode == neon_cvt_mode_a || mode == neon_cvt_mode_n + || mode == neon_cvt_mode_m || mode == neon_cvt_mode_p) + && (flavour == neon_cvt_flavour_s16_f16 + || flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_s32_f32 + || flavour == neon_cvt_flavour_u32_f32)) + { + if (!check_simd_pred_availability (TRUE, + NEON_CHECK_CC | NEON_CHECK_ARCH8)) + return; + } + else if (mode == neon_cvt_mode_z + && (flavour == neon_cvt_flavour_f16_s16 + || flavour == neon_cvt_flavour_f16_u16 + || flavour == neon_cvt_flavour_s16_f16 + || flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_f32_u32 + || flavour == neon_cvt_flavour_f32_s32 + || flavour == neon_cvt_flavour_s32_f32 + || flavour == neon_cvt_flavour_u32_f32)) + { + if (!check_simd_pred_availability (TRUE, + NEON_CHECK_CC | NEON_CHECK_ARCH)) + return; + } + /* fall through. */ + case NS_DD: + if (mode != neon_cvt_mode_x && mode != neon_cvt_mode_z) + { + + NEON_ENCODE (FLOAT, inst); + if (!check_simd_pred_availability (TRUE, + NEON_CHECK_CC | NEON_CHECK_ARCH8)) + return; + + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= neon_quad (rs) << 6; + inst.instruction |= (flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_u32_f32) << 7; + inst.instruction |= mode << 8; + if (flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_s16_f16) + /* Mask off the original size bits and reencode them. */ + inst.instruction = ((inst.instruction & 0xfff3ffff) | (1 << 18)); + + if (thumb_mode) + inst.instruction |= 0xfc000000; + else + inst.instruction |= 0xf0000000; + } + else + { + int_encode: + { + unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080, + 0x100, 0x180, 0x0, 0x080}; + + NEON_ENCODE (INTEGER, inst); + + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)) + { + if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) + return; + } + + if (flavour != neon_cvt_flavour_invalid) + inst.instruction |= enctab[flavour]; + + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= neon_quad (rs) << 6; + if (flavour >= neon_cvt_flavour_s16_f16 + && flavour <= neon_cvt_flavour_f16_u16) + /* Half precision. */ + inst.instruction |= 1 << 18; + else + inst.instruction |= 2 << 18; + + neon_dp_fixup (&inst); + } + } + break; + + /* Half-precision conversions for Advanced SIMD -- neon. */ + case NS_QD: + case NS_DQ: + if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) + return; + + if ((rs == NS_DQ) + && (inst.vectype.el[0].size != 16 || inst.vectype.el[1].size != 32)) + { + as_bad (_("operand size must match register width")); + break; + } + + if ((rs == NS_QD) + && ((inst.vectype.el[0].size != 32 || inst.vectype.el[1].size != 16))) + { + as_bad (_("operand size must match register width")); + break; + } + + if (rs == NS_DQ) + { + if (flavour == neon_cvt_flavour_bf16_f32) + { + if (vfp_or_neon_is_neon (NEON_CHECK_ARCH8) == FAIL) + return; + constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16)); + /* VCVT.bf16.f32. */ + inst.instruction = 0x11b60640; + } + else + /* VCVT.f16.f32. */ + inst.instruction = 0x3b60600; + } + else + /* VCVT.f32.f16. */ + inst.instruction = 0x3b60700; + + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + neon_dp_fixup (&inst); + break; + + default: + /* Some VFP conversions go here (s32 <-> f32, u32 <-> f32). */ + if (mode == neon_cvt_mode_x || mode == neon_cvt_mode_z) + do_vfp_nsyn_cvt (rs, flavour); + else + do_vfp_nsyn_cvt_fpv8 (flavour, mode); + } +} + +static void +do_neon_cvtr (void) +{ + do_neon_cvt_1 (neon_cvt_mode_x); +} + +static void +do_neon_cvt (void) +{ + do_neon_cvt_1 (neon_cvt_mode_z); +} + +static void +do_neon_cvta (void) +{ + do_neon_cvt_1 (neon_cvt_mode_a); +} + +static void +do_neon_cvtn (void) +{ + do_neon_cvt_1 (neon_cvt_mode_n); +} + +static void +do_neon_cvtp (void) +{ + do_neon_cvt_1 (neon_cvt_mode_p); +} + +static void +do_neon_cvtm (void) +{ + do_neon_cvt_1 (neon_cvt_mode_m); +} + +static void +do_neon_cvttb_2 (bfd_boolean t, bfd_boolean to, bfd_boolean is_double) +{ + if (is_double) + mark_feature_used (&fpu_vfp_ext_armv8); + + encode_arm_vfp_reg (inst.operands[0].reg, + (is_double && !to) ? VFP_REG_Dd : VFP_REG_Sd); + encode_arm_vfp_reg (inst.operands[1].reg, + (is_double && to) ? VFP_REG_Dm : VFP_REG_Sm); + inst.instruction |= to ? 0x10000 : 0; + inst.instruction |= t ? 0x80 : 0; + inst.instruction |= is_double ? 0x100 : 0; + do_vfp_cond_or_thumb (); +} + +static void +do_neon_cvttb_1 (bfd_boolean t) +{ + enum neon_shape rs = neon_select_shape (NS_HF, NS_HD, NS_FH, NS_FF, NS_FD, + NS_DF, NS_DH, NS_QQ, NS_QQI, NS_NULL); + + if (rs == NS_NULL) + return; + else if (rs == NS_QQ || rs == NS_QQI) + { + int single_to_half = 0; + if (!check_simd_pred_availability (TRUE, NEON_CHECK_ARCH)) + return; + + enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs); + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + && (flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_s16_f16 + || flavour == neon_cvt_flavour_f16_s16 + || flavour == neon_cvt_flavour_f16_u16 + || flavour == neon_cvt_flavour_u32_f32 + || flavour == neon_cvt_flavour_s32_f32 + || flavour == neon_cvt_flavour_f32_s32 + || flavour == neon_cvt_flavour_f32_u32)) + { + inst.cond = 0xf; + inst.instruction = N_MNEM_vcvt; + set_pred_insn_type (INSIDE_VPT_INSN); + do_neon_cvt_1 (neon_cvt_mode_z); + return; + } + else if (rs == NS_QQ && flavour == neon_cvt_flavour_f32_f16) + single_to_half = 1; + else if (rs == NS_QQ && flavour != neon_cvt_flavour_f16_f32) + { + first_error (BAD_FPU); + return; + } + + inst.instruction = 0xee3f0e01; + inst.instruction |= single_to_half << 28; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[0].reg) << 13; + inst.instruction |= t << 12; + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= LOW4 (inst.operands[1].reg) << 1; + inst.is_neon = 1; + } + else if (neon_check_type (2, rs, N_F16, N_F32 | N_VFP).type != NT_invtype) + { + inst.error = NULL; + do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/FALSE); + } + else if (neon_check_type (2, rs, N_F32 | N_VFP, N_F16).type != NT_invtype) + { + inst.error = NULL; + do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/FALSE); + } + else if (neon_check_type (2, rs, N_F16, N_F64 | N_VFP).type != NT_invtype) + { + /* The VCVTB and VCVTT instructions with D-register operands + don't work for SP only targets. */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), + _(BAD_FPU)); + + inst.error = NULL; + do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/TRUE); + } + else if (neon_check_type (2, rs, N_F64 | N_VFP, N_F16).type != NT_invtype) + { + /* The VCVTB and VCVTT instructions with D-register operands + don't work for SP only targets. */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), + _(BAD_FPU)); + + inst.error = NULL; + do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE); + } + else if (neon_check_type (2, rs, N_BF16 | N_VFP, N_F32).type != NT_invtype) + { + constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16)); + inst.error = NULL; + inst.instruction |= (1 << 8); + inst.instruction &= ~(1 << 9); + do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/FALSE); + } + else + return; +} + +static void +do_neon_cvtb (void) +{ + do_neon_cvttb_1 (FALSE); +} + + +static void +do_neon_cvtt (void) +{ + do_neon_cvttb_1 (TRUE); +} + +static void +neon_move_immediate (void) +{ + enum neon_shape rs = neon_select_shape (NS_DI, NS_QI, NS_NULL); + struct neon_type_el et = neon_check_type (2, rs, + N_I8 | N_I16 | N_I32 | N_I64 | N_F32 | N_KEY, N_EQK); + unsigned immlo, immhi = 0, immbits; + int op, cmode, float_p; + + constraint (et.type == NT_invtype, + _("operand size must be specified for immediate VMOV")); + + /* We start out as an MVN instruction if OP = 1, MOV otherwise. */ + op = (inst.instruction & (1 << 5)) != 0; + + immlo = inst.operands[1].imm; + if (inst.operands[1].regisimm) + immhi = inst.operands[1].reg; + + constraint (et.size < 32 && (immlo & ~((1 << et.size) - 1)) != 0, + _("immediate has bits set outside the operand size")); + + float_p = inst.operands[1].immisfloat; + + if ((cmode = neon_cmode_for_move_imm (immlo, immhi, float_p, &immbits, &op, + et.size, et.type)) == FAIL) + { + /* Invert relevant bits only. */ + neon_invert_size (&immlo, &immhi, et.size); + /* Flip from VMOV/VMVN to VMVN/VMOV. Some immediate types are unavailable + with one or the other; those cases are caught by + neon_cmode_for_move_imm. */ + op = !op; + if ((cmode = neon_cmode_for_move_imm (immlo, immhi, float_p, &immbits, + &op, et.size, et.type)) == FAIL) + { + first_error (_("immediate out of range")); + return; + } + } + + inst.instruction &= ~(1 << 5); + inst.instruction |= op << 5; + + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= neon_quad (rs) << 6; + inst.instruction |= cmode << 8; + + neon_write_immbits (immbits); +} + +static void +do_neon_mvn (void) +{ + if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH)) + return; + + if (inst.operands[1].isreg) + { + enum neon_shape rs; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + rs = neon_select_shape (NS_QQ, NS_NULL); + else + rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + + NEON_ENCODE (INTEGER, inst); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= neon_quad (rs) << 6; + } + else + { + NEON_ENCODE (IMMED, inst); + neon_move_immediate (); + } + + neon_dp_fixup (&inst); + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + constraint (!inst.operands[1].isreg && !inst.operands[0].isquad, BAD_FPU); + } +} + +/* Encode instructions of form: + + |28/24|23|22|21 20|19 16|15 12|11 8|7|6|5|4|3 0| + | U |x |D |size | Rn | Rd |x x x x|N|x|M|x| Rm | */ + +static void +neon_mixed_length (struct neon_type_el et, unsigned size) +{ + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + inst.instruction |= (et.type == NT_unsigned) << 24; + inst.instruction |= neon_logbits (size) << 20; + + neon_dp_fixup (&inst); +} + +static void +do_neon_dyadic_long (void) +{ + enum neon_shape rs = neon_select_shape (NS_QDD, NS_HHH, NS_FFF, NS_DDD, NS_NULL); + if (rs == NS_QDD) + { + if (vfp_or_neon_is_neon (NEON_CHECK_ARCH | NEON_CHECK_CC) == FAIL) + return; + + NEON_ENCODE (INTEGER, inst); + /* FIXME: Type checking for lengthening op. */ + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_DBL, N_EQK, N_SU_32 | N_KEY); + neon_mixed_length (et, et.size); + } + else if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + && (inst.cond == 0xf || inst.cond == 0x10)) + { + /* If parsing for MVE, vaddl/vsubl/vabdl{e,t} can only be vadd/vsub/vabd + in an IT block with le/lt conditions. */ + + if (inst.cond == 0xf) + inst.cond = 0xb; + else if (inst.cond == 0x10) + inst.cond = 0xd; + + inst.pred_insn_type = INSIDE_IT_INSN; + + if (inst.instruction == N_MNEM_vaddl) + { + inst.instruction = N_MNEM_vadd; + do_neon_addsub_if_i (); + } + else if (inst.instruction == N_MNEM_vsubl) + { + inst.instruction = N_MNEM_vsub; + do_neon_addsub_if_i (); + } + else if (inst.instruction == N_MNEM_vabdl) + { + inst.instruction = N_MNEM_vabd; + do_neon_dyadic_if_su (); + } + } + else + first_error (BAD_FPU); +} + +static void +do_neon_abal (void) +{ + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_INT | N_DBL, N_EQK, N_SU_32 | N_KEY); + neon_mixed_length (et, et.size); +} + +static void +neon_mac_reg_scalar_long (unsigned regtypes, unsigned scalartypes) +{ + if (inst.operands[2].isscalar) + { + struct neon_type_el et = neon_check_type (3, NS_QDS, + N_EQK | N_DBL, N_EQK, regtypes | N_KEY); + NEON_ENCODE (SCALAR, inst); + neon_mul_mac (et, et.type == NT_unsigned); + } + else + { + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_DBL, N_EQK, scalartypes | N_KEY); + NEON_ENCODE (INTEGER, inst); + neon_mixed_length (et, et.size); + } +} + +static void +do_neon_mac_maybe_scalar_long (void) +{ + neon_mac_reg_scalar_long (N_S16 | N_S32 | N_U16 | N_U32, N_SU_32); +} + +/* Like neon_scalar_for_mul, this function generate Rm encoding from GAS's + internal SCALAR. QUAD_P is 1 if it's for Q format, otherwise it's 0. */ + +static unsigned +neon_scalar_for_fmac_fp16_long (unsigned scalar, unsigned quad_p) +{ + unsigned regno = NEON_SCALAR_REG (scalar); + unsigned elno = NEON_SCALAR_INDEX (scalar); + + if (quad_p) + { + if (regno > 7 || elno > 3) + goto bad_scalar; + + return ((regno & 0x7) + | ((elno & 0x1) << 3) + | (((elno >> 1) & 0x1) << 5)); + } + else + { + if (regno > 15 || elno > 1) + goto bad_scalar; + + return (((regno & 0x1) << 5) + | ((regno >> 1) & 0x7) + | ((elno & 0x1) << 3)); + } + + bad_scalar: + first_error (_("scalar out of range for multiply instruction")); + return 0; +} + +static void +do_neon_fmac_maybe_scalar_long (int subtype) +{ + enum neon_shape rs; + int high8; + /* NOTE: vfmal/vfmsl use slightly different NEON three-same encoding. 'size" + field (bits[21:20]) has different meaning. For scalar index variant, it's + used to differentiate add and subtract, otherwise it's with fixed value + 0x2. */ + int size = -1; + + /* vfmal/vfmsl are in three-same D/Q register format or the third operand can + be a scalar index register. */ + if (inst.operands[2].isscalar) + { + high8 = 0xfe000000; + if (subtype) + size = 16; + rs = neon_select_shape (NS_DHS, NS_QDS, NS_NULL); + } + else + { + high8 = 0xfc000000; + size = 32; + if (subtype) + inst.instruction |= (0x1 << 23); + rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL); + } + + + if (inst.cond != COND_ALWAYS) + as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the " + "behaviour is UNPREDICTABLE")); + + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml), + _(BAD_FP16)); + + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8), + _(BAD_FPU)); + + /* "opcode" from template has included "ubit", so simply pass 0 here. Also, + the "S" bit in size field has been reused to differentiate vfmal and vfmsl, + so we simply pass -1 as size. */ + unsigned quad_p = (rs == NS_QDD || rs == NS_QDS); + neon_three_same (quad_p, 0, size); + + /* Undo neon_dp_fixup. Redo the high eight bits. */ + inst.instruction &= 0x00ffffff; + inst.instruction |= high8; + + /* Unlike usually NEON three-same, encoding for Vn and Vm will depend on + whether the instruction is in Q form and whether Vm is a scalar indexed + operand. */ + if (inst.operands[2].isscalar) + { + unsigned rm + = neon_scalar_for_fmac_fp16_long (inst.operands[2].reg, quad_p); + inst.instruction &= 0xffffffd0; + inst.instruction |= rm; + + if (!quad_p) + { + /* Redo Rn as well. */ + inst.instruction &= 0xfff0ff7f; + inst.instruction |= HI4 (inst.operands[1].reg) << 16; + inst.instruction |= LOW1 (inst.operands[1].reg) << 7; + } + } + else if (!quad_p) + { + /* Redo Rn and Rm. */ + inst.instruction &= 0xfff0ff50; + inst.instruction |= HI4 (inst.operands[1].reg) << 16; + inst.instruction |= LOW1 (inst.operands[1].reg) << 7; + inst.instruction |= HI4 (inst.operands[2].reg); + inst.instruction |= LOW1 (inst.operands[2].reg) << 5; + } +} + +static void +do_neon_vfmal (void) +{ + return do_neon_fmac_maybe_scalar_long (0); +} + +static void +do_neon_vfmsl (void) +{ + return do_neon_fmac_maybe_scalar_long (1); +} + +static void +do_neon_dyadic_wide (void) +{ + struct neon_type_el et = neon_check_type (3, NS_QQD, + N_EQK | N_DBL, N_EQK | N_DBL, N_SU_32 | N_KEY); + neon_mixed_length (et, et.size); +} + +static void +do_neon_dyadic_narrow (void) +{ + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_DBL, N_EQK, N_I16 | N_I32 | N_I64 | N_KEY); + /* Operand sign is unimportant, and the U bit is part of the opcode, + so force the operand type to integer. */ + et.type = NT_integer; + neon_mixed_length (et, et.size / 2); +} + +static void +do_neon_mul_sat_scalar_long (void) +{ + neon_mac_reg_scalar_long (N_S16 | N_S32, N_S16 | N_S32); +} + +static void +do_neon_vmull (void) +{ + if (inst.operands[2].isscalar) + do_neon_mac_maybe_scalar_long (); + else + { + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_DBL, N_EQK, N_SU_32 | N_P8 | N_P64 | N_KEY); + + if (et.type == NT_poly) + NEON_ENCODE (POLY, inst); + else + NEON_ENCODE (INTEGER, inst); + + /* For polynomial encoding the U bit must be zero, and the size must + be 8 (encoded as 0b00) or, on ARMv8 or later 64 (encoded, non + obviously, as 0b10). */ + if (et.size == 64) + { + /* Check we're on the correct architecture. */ + if (!mark_feature_used (&fpu_crypto_ext_armv8)) + inst.error = + _("Instruction form not available on this architecture."); + + et.size = 32; + } + + neon_mixed_length (et, et.size); + } +} + +static void +do_neon_ext (void) +{ + enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY); + unsigned imm = (inst.operands[3].imm * et.size) / 8; + + constraint (imm >= (unsigned) (neon_quad (rs) ? 16 : 8), + _("shift out of range")); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + inst.instruction |= neon_quad (rs) << 6; + inst.instruction |= imm << 8; + + neon_dp_fixup (&inst); +} + +static void +do_neon_rev (void) +{ + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC)) + return; + + enum neon_shape rs; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + rs = neon_select_shape (NS_QQ, NS_NULL); + else + rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_8 | N_16 | N_32 | N_KEY); + + unsigned op = (inst.instruction >> 7) & 3; + /* N (width of reversed regions) is encoded as part of the bitmask. We + extract it here to check the elements to be reversed are smaller. + Otherwise we'd get a reserved instruction. */ + unsigned elsize = (op == 2) ? 16 : (op == 1) ? 32 : (op == 0) ? 64 : 0; + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) && elsize == 64 + && inst.operands[0].reg == inst.operands[1].reg) + as_tsktsk (_("Warning: 64-bit element size and same destination and source" + " operands makes instruction UNPREDICTABLE")); + + gas_assert (elsize != 0); + constraint (et.size >= elsize, + _("elements must be smaller than reversal region")); + neon_two_same (neon_quad (rs), 1, et.size); +} + +static void +do_neon_dup (void) +{ + if (inst.operands[1].isscalar) + { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1), + BAD_FPU); + enum neon_shape rs = neon_select_shape (NS_DS, NS_QS, NS_NULL); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_8 | N_16 | N_32 | N_KEY); + unsigned sizebits = et.size >> 3; + unsigned dm = NEON_SCALAR_REG (inst.operands[1].reg); + int logsize = neon_logbits (et.size); + unsigned x = NEON_SCALAR_INDEX (inst.operands[1].reg) << logsize; + + if (vfp_or_neon_is_neon (NEON_CHECK_CC) == FAIL) + return; + + NEON_ENCODE (SCALAR, inst); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (dm); + inst.instruction |= HI1 (dm) << 5; + inst.instruction |= neon_quad (rs) << 6; + inst.instruction |= x << 17; + inst.instruction |= sizebits << 16; + + neon_dp_fixup (&inst); + } + else + { + enum neon_shape rs = neon_select_shape (NS_DR, NS_QR, NS_NULL); + struct neon_type_el et = neon_check_type (2, rs, + N_8 | N_16 | N_32 | N_KEY, N_EQK); + if (rs == NS_QR) + { + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH)) + return; + } + else + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1), + BAD_FPU); + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + if (inst.operands[1].reg == REG_SP) + as_tsktsk (MVE_BAD_SP); + else if (inst.operands[1].reg == REG_PC) + as_tsktsk (MVE_BAD_PC); + } + + /* Duplicate ARM register to lanes of vector. */ + NEON_ENCODE (ARMREG, inst); + switch (et.size) + { + case 8: inst.instruction |= 0x400000; break; + case 16: inst.instruction |= 0x000020; break; + case 32: inst.instruction |= 0x000000; break; + default: break; + } + inst.instruction |= LOW4 (inst.operands[1].reg) << 12; + inst.instruction |= LOW4 (inst.operands[0].reg) << 16; + inst.instruction |= HI1 (inst.operands[0].reg) << 7; + inst.instruction |= neon_quad (rs) << 21; + /* The encoding for this instruction is identical for the ARM and Thumb + variants, except for the condition field. */ + do_vfp_cond_or_thumb (); + } +} + +static void +do_mve_mov (int toQ) +{ + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + return; + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = MVE_UNPREDICABLE_INSN; + + unsigned Rt = 0, Rt2 = 1, Q0 = 2, Q1 = 3; + if (toQ) + { + Q0 = 0; + Q1 = 1; + Rt = 2; + Rt2 = 3; + } + + constraint (inst.operands[Q0].reg != inst.operands[Q1].reg + 2, + _("Index one must be [2,3] and index two must be two less than" + " index one.")); + constraint (inst.operands[Rt].reg == inst.operands[Rt2].reg, + _("General purpose registers may not be the same")); + constraint (inst.operands[Rt].reg == REG_SP + || inst.operands[Rt2].reg == REG_SP, + BAD_SP); + constraint (inst.operands[Rt].reg == REG_PC + || inst.operands[Rt2].reg == REG_PC, + BAD_PC); + + inst.instruction = 0xec000f00; + inst.instruction |= HI1 (inst.operands[Q1].reg / 32) << 23; + inst.instruction |= !!toQ << 20; + inst.instruction |= inst.operands[Rt2].reg << 16; + inst.instruction |= LOW4 (inst.operands[Q1].reg / 32) << 13; + inst.instruction |= (inst.operands[Q1].reg % 4) << 4; + inst.instruction |= inst.operands[Rt].reg; +} + +static void +do_mve_movn (void) +{ + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + return; + + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + struct neon_type_el et = neon_check_type (2, NS_QQ, N_EQK, N_I16 | N_I32 + | N_KEY); + + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= (neon_logbits (et.size) - 1) << 18; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.is_neon = 1; + +} + +/* VMOV has particularly many variations. It can be one of: + 0. VMOV , + 1. VMOV
, + (Register operations, which are VORR with Rm = Rn.) + 2. VMOV.
, # + 3. VMOV.
, # + (Immediate loads.) + 4. VMOV. , + (ARM register to scalar.) + 5. VMOV , , + (Two ARM registers to vector.) + 6. VMOV.
, + (Scalar to ARM register.) + 7. VMOV , , + (Vector to two ARM registers.) + 8. VMOV.F32 , + 9. VMOV.F64
, + (VFP register moves.) + 10. VMOV.F32 , #imm + 11. VMOV.F64
, #imm + (VFP float immediate load.) + 12. VMOV , + (VFP single to ARM reg.) + 13. VMOV , + (ARM reg to VFP single.) + 14. VMOV , , , + (Two ARM regs to two VFP singles.) + 15. VMOV , , , + (Two VFP singles to two ARM regs.) + 16. VMOV , , , + 17. VMOV , , , + 18. VMOV.
, + 19. VMOV.
, + + These cases can be disambiguated using neon_select_shape, except cases 1/9 + and 3/11 which depend on the operand type too. + + All the encoded bits are hardcoded by this function. + + Cases 4, 6 may be used with VFPv1 and above (only 32-bit transfers!). + Cases 5, 7 may be used with VFPv2 and above. + + FIXME: Some of the checking may be a bit sloppy (in a couple of cases you + can specify a type where it doesn't make sense to, and is ignored). */ + +static void +do_neon_mov (void) +{ + enum neon_shape rs = neon_select_shape (NS_RRSS, NS_SSRR, NS_RRFF, NS_FFRR, + NS_DRR, NS_RRD, NS_QQ, NS_DD, NS_QI, + NS_DI, NS_SR, NS_RS, NS_FF, NS_FI, + NS_RF, NS_FR, NS_HR, NS_RH, NS_HI, + NS_NULL); + struct neon_type_el et; + const char *ldconst = 0; + + switch (rs) + { + case NS_DD: /* case 1/9. */ + et = neon_check_type (2, rs, N_EQK, N_F64 | N_KEY); + /* It is not an error here if no type is given. */ + inst.error = NULL; + + /* In MVE we interpret the following instructions as same, so ignoring + the following type (float) and size (64) checks. + a: VMOV
, + b: VMOV.F64
, . */ + if ((et.type == NT_float && et.size == 64) + || (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))) + { + do_vfp_nsyn_opcode ("fcpyd"); + break; + } + /* fall through. */ + + case NS_QQ: /* case 0/1. */ + { + if (!check_simd_pred_availability (FALSE, + NEON_CHECK_CC | NEON_CHECK_ARCH)) + return; + /* The architecture manual I have doesn't explicitly state which + value the U bit should have for register->register moves, but + the equivalent VORR instruction has U = 0, so do that. */ + inst.instruction = 0x0200110; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= neon_quad (rs) << 6; + + neon_dp_fixup (&inst); + } + break; + + case NS_DI: /* case 3/11. */ + et = neon_check_type (2, rs, N_EQK, N_F64 | N_KEY); + inst.error = NULL; + if (et.type == NT_float && et.size == 64) + { + /* case 11 (fconstd). */ + ldconst = "fconstd"; + goto encode_fconstd; + } + /* fall through. */ + + case NS_QI: /* case 2/3. */ + if (!check_simd_pred_availability (FALSE, + NEON_CHECK_CC | NEON_CHECK_ARCH)) + return; + inst.instruction = 0x0800010; + neon_move_immediate (); + neon_dp_fixup (&inst); + break; + + case NS_SR: /* case 4. */ + { + unsigned bcdebits = 0; + int logsize; + unsigned dn = NEON_SCALAR_REG (inst.operands[0].reg); + unsigned x = NEON_SCALAR_INDEX (inst.operands[0].reg); + + /* . is optional here, defaulting to .32. */ + if (inst.vectype.elems == 0 + && inst.operands[0].vectype.type == NT_invtype + && inst.operands[1].vectype.type == NT_invtype) + { + inst.vectype.el[0].type = NT_untyped; + inst.vectype.el[0].size = 32; + inst.vectype.elems = 1; + } + + et = neon_check_type (2, NS_NULL, N_8 | N_16 | N_32 | N_KEY, N_EQK); + logsize = neon_logbits (et.size); + + if (et.size != 32) + { + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + && vfp_or_neon_is_neon (NEON_CHECK_ARCH) == FAIL) + return; + } + else + { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + } + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + if (inst.operands[1].reg == REG_SP) + as_tsktsk (MVE_BAD_SP); + else if (inst.operands[1].reg == REG_PC) + as_tsktsk (MVE_BAD_PC); + } + unsigned size = inst.operands[0].isscalar == 1 ? 64 : 128; + + constraint (et.type == NT_invtype, _("bad type for scalar")); + constraint (x >= size / et.size, _("scalar index out of range")); + + + switch (et.size) + { + case 8: bcdebits = 0x8; break; + case 16: bcdebits = 0x1; break; + case 32: bcdebits = 0x0; break; + default: ; + } + + bcdebits |= (x & ((1 << (3-logsize)) - 1)) << logsize; + + inst.instruction = 0xe000b10; + do_vfp_cond_or_thumb (); + inst.instruction |= LOW4 (dn) << 16; + inst.instruction |= HI1 (dn) << 7; + inst.instruction |= inst.operands[1].reg << 12; + inst.instruction |= (bcdebits & 3) << 5; + inst.instruction |= ((bcdebits >> 2) & 3) << 21; + inst.instruction |= (x >> (3-logsize)) << 16; + } + break; + + case NS_DRR: /* case 5 (fmdrr). */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + + inst.instruction = 0xc400b10; + do_vfp_cond_or_thumb (); + inst.instruction |= LOW4 (inst.operands[0].reg); + inst.instruction |= HI1 (inst.operands[0].reg) << 5; + inst.instruction |= inst.operands[1].reg << 12; + inst.instruction |= inst.operands[2].reg << 16; + break; + + case NS_RS: /* case 6. */ + { + unsigned logsize; + unsigned dn = NEON_SCALAR_REG (inst.operands[1].reg); + unsigned x = NEON_SCALAR_INDEX (inst.operands[1].reg); + unsigned abcdebits = 0; + + /* .
is optional here, defaulting to .32. */ + if (inst.vectype.elems == 0 + && inst.operands[0].vectype.type == NT_invtype + && inst.operands[1].vectype.type == NT_invtype) + { + inst.vectype.el[0].type = NT_untyped; + inst.vectype.el[0].size = 32; + inst.vectype.elems = 1; + } + + et = neon_check_type (2, NS_NULL, + N_EQK, N_S8 | N_S16 | N_U8 | N_U16 | N_32 | N_KEY); + logsize = neon_logbits (et.size); + + if (et.size != 32) + { + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + && vfp_or_neon_is_neon (NEON_CHECK_CC + | NEON_CHECK_ARCH) == FAIL) + return; + } + else + { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + } + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + if (inst.operands[0].reg == REG_SP) + as_tsktsk (MVE_BAD_SP); + else if (inst.operands[0].reg == REG_PC) + as_tsktsk (MVE_BAD_PC); + } + + unsigned size = inst.operands[1].isscalar == 1 ? 64 : 128; + + constraint (et.type == NT_invtype, _("bad type for scalar")); + constraint (x >= size / et.size, _("scalar index out of range")); + + switch (et.size) + { + case 8: abcdebits = (et.type == NT_signed) ? 0x08 : 0x18; break; + case 16: abcdebits = (et.type == NT_signed) ? 0x01 : 0x11; break; + case 32: abcdebits = 0x00; break; + default: ; + } + + abcdebits |= (x & ((1 << (3-logsize)) - 1)) << logsize; + inst.instruction = 0xe100b10; + do_vfp_cond_or_thumb (); + inst.instruction |= LOW4 (dn) << 16; + inst.instruction |= HI1 (dn) << 7; + inst.instruction |= inst.operands[0].reg << 12; + inst.instruction |= (abcdebits & 3) << 5; + inst.instruction |= (abcdebits >> 2) << 21; + inst.instruction |= (x >> (3-logsize)) << 16; + } + break; + + case NS_RRD: /* case 7 (fmrrd). */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + + inst.instruction = 0xc500b10; + do_vfp_cond_or_thumb (); + inst.instruction |= inst.operands[0].reg << 12; + inst.instruction |= inst.operands[1].reg << 16; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + break; + + case NS_FF: /* case 8 (fcpys). */ + do_vfp_nsyn_opcode ("fcpys"); + break; + + case NS_HI: + case NS_FI: /* case 10 (fconsts). */ + ldconst = "fconsts"; + encode_fconstd: + if (!inst.operands[1].immisfloat) + { + unsigned new_imm; + /* Immediate has to fit in 8 bits so float is enough. */ + float imm = (float) inst.operands[1].imm; + memcpy (&new_imm, &imm, sizeof (float)); + /* But the assembly may have been written to provide an integer + bit pattern that equates to a float, so check that the + conversion has worked. */ + if (is_quarter_float (new_imm)) + { + if (is_quarter_float (inst.operands[1].imm)) + as_warn (_("immediate constant is valid both as a bit-pattern and a floating point value (using the fp value)")); + + inst.operands[1].imm = new_imm; + inst.operands[1].immisfloat = 1; + } + } + + if (is_quarter_float (inst.operands[1].imm)) + { + inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm); + do_vfp_nsyn_opcode (ldconst); + + /* ARMv8.2 fp16 vmov.f16 instruction. */ + if (rs == NS_HI) + do_scalar_fp16_v82_encode (); + } + else + first_error (_("immediate out of range")); + break; + + case NS_RH: + case NS_RF: /* case 12 (fmrs). */ + do_vfp_nsyn_opcode ("fmrs"); + /* ARMv8.2 fp16 vmov.f16 instruction. */ + if (rs == NS_RH) + do_scalar_fp16_v82_encode (); + break; + + case NS_HR: + case NS_FR: /* case 13 (fmsr). */ + do_vfp_nsyn_opcode ("fmsr"); + /* ARMv8.2 fp16 vmov.f16 instruction. */ + if (rs == NS_HR) + do_scalar_fp16_v82_encode (); + break; + + case NS_RRSS: + do_mve_mov (0); + break; + case NS_SSRR: + do_mve_mov (1); + break; + + /* The encoders for the fmrrs and fmsrr instructions expect three operands + (one of which is a list), but we have parsed four. Do some fiddling to + make the operands what do_vfp_reg2_from_sp2 and do_vfp_sp2_from_reg2 + expect. */ + case NS_RRFF: /* case 14 (fmrrs). */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + constraint (inst.operands[3].reg != inst.operands[2].reg + 1, + _("VFP registers must be adjacent")); + inst.operands[2].imm = 2; + memset (&inst.operands[3], '\0', sizeof (inst.operands[3])); + do_vfp_nsyn_opcode ("fmrrs"); + break; + + case NS_FFRR: /* case 15 (fmsrr). */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + constraint (inst.operands[1].reg != inst.operands[0].reg + 1, + _("VFP registers must be adjacent")); + inst.operands[1] = inst.operands[2]; + inst.operands[2] = inst.operands[3]; + inst.operands[0].imm = 2; + memset (&inst.operands[3], '\0', sizeof (inst.operands[3])); + do_vfp_nsyn_opcode ("fmsrr"); + break; + + case NS_NULL: + /* neon_select_shape has determined that the instruction + shape is wrong and has already set the error message. */ + break; + + default: + abort (); } } static void -do_neon_vfmal (void) +do_mve_movl (void) { - return do_neon_fmac_maybe_scalar_long (0); -} + if (!(inst.operands[0].present && inst.operands[0].isquad + && inst.operands[1].present && inst.operands[1].isquad + && !inst.operands[2].present)) + { + inst.instruction = 0; + inst.cond = 0xb; + if (thumb_mode) + set_pred_insn_type (INSIDE_IT_INSN); + do_neon_mov (); + return; + } -static void -do_neon_vfmsl (void) -{ - return do_neon_fmac_maybe_scalar_long (1); -} + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + return; -static void -do_neon_dyadic_wide (void) -{ - struct neon_type_el et = neon_check_type (3, NS_QQD, - N_EQK | N_DBL, N_EQK | N_DBL, N_SU_32 | N_KEY); - neon_mixed_length (et, et.size); -} + if (inst.cond != COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; -static void -do_neon_dyadic_narrow (void) -{ - struct neon_type_el et = neon_check_type (3, NS_QDD, - N_EQK | N_DBL, N_EQK, N_I16 | N_I32 | N_I64 | N_KEY); - /* Operand sign is unimportant, and the U bit is part of the opcode, - so force the operand type to integer. */ - et.type = NT_integer; - neon_mixed_length (et, et.size / 2); + struct neon_type_el et = neon_check_type (2, NS_QQ, N_EQK, N_S8 | N_U8 + | N_S16 | N_U16 | N_KEY); + + inst.instruction |= (et.type == NT_unsigned) << 28; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= (neon_logbits (et.size) + 1) << 19; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.is_neon = 1; } static void -do_neon_mul_sat_scalar_long (void) +do_neon_rshift_round_imm (void) { - neon_mac_reg_scalar_long (N_S16 | N_S32, N_S16 | N_S32); + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC)) + return; + + enum neon_shape rs; + struct neon_type_el et; + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + rs = neon_select_shape (NS_QQI, NS_NULL); + et = neon_check_type (2, rs, N_EQK, N_SU_MVE | N_KEY); + } + else + { + rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL); + et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY); + } + int imm = inst.operands[2].imm; + + /* imm == 0 case is encoded as VMOV for V{R}SHR. */ + if (imm == 0) + { + inst.operands[2].present = 0; + do_neon_mov (); + return; + } + + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range for shift")); + neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et, + et.size - imm); } static void -do_neon_vmull (void) +do_neon_movhf (void) { - if (inst.operands[2].isscalar) - do_neon_mac_maybe_scalar_long (); - else - { - struct neon_type_el et = neon_check_type (3, NS_QDD, - N_EQK | N_DBL, N_EQK, N_SU_32 | N_P8 | N_P64 | N_KEY); + enum neon_shape rs = neon_select_shape (NS_HH, NS_NULL); + constraint (rs != NS_HH, _("invalid suffix")); - if (et.type == NT_poly) - NEON_ENCODE (POLY, inst); + if (inst.cond != COND_ALWAYS) + { + if (thumb_mode) + { + as_warn (_("ARMv8.2 scalar fp16 instruction cannot be conditional," + " the behaviour is UNPREDICTABLE")); + } else - NEON_ENCODE (INTEGER, inst); - - /* For polynomial encoding the U bit must be zero, and the size must - be 8 (encoded as 0b00) or, on ARMv8 or later 64 (encoded, non - obviously, as 0b10). */ - if (et.size == 64) { - /* Check we're on the correct architecture. */ - if (!mark_feature_used (&fpu_crypto_ext_armv8)) - inst.error = - _("Instruction form not available on this architecture."); - - et.size = 32; + inst.error = BAD_COND; + return; } - - neon_mixed_length (et, et.size); } + + do_vfp_sp_monadic (); + + inst.is_neon = 1; + inst.instruction |= 0xf0000000; } static void -do_neon_ext (void) +do_neon_movl (void) { - enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL); - struct neon_type_el et = neon_check_type (3, rs, - N_EQK, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY); - unsigned imm = (inst.operands[3].imm * et.size) / 8; - - constraint (imm >= (unsigned) (neon_quad (rs) ? 16 : 8), - _("shift out of range")); - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg) << 16; - inst.instruction |= HI1 (inst.operands[1].reg) << 7; - inst.instruction |= LOW4 (inst.operands[2].reg); - inst.instruction |= HI1 (inst.operands[2].reg) << 5; - inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= imm << 8; - - neon_dp_fixup (&inst); + struct neon_type_el et = neon_check_type (2, NS_QD, + N_EQK | N_DBL, N_SU_32 | N_KEY); + unsigned sizebits = et.size >> 3; + inst.instruction |= sizebits << 19; + neon_two_same (0, et.type == NT_unsigned, -1); } static void -do_neon_rev (void) +do_neon_trn (void) { enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_KEY); - unsigned op = (inst.instruction >> 7) & 3; - /* N (width of reversed regions) is encoded as part of the bitmask. We - extract it here to check the elements to be reversed are smaller. - Otherwise we'd get a reserved instruction. */ - unsigned elsize = (op == 2) ? 16 : (op == 1) ? 32 : (op == 0) ? 64 : 0; - gas_assert (elsize != 0); - constraint (et.size >= elsize, - _("elements must be smaller than reversal region")); + NEON_ENCODE (INTEGER, inst); neon_two_same (neon_quad (rs), 1, et.size); } static void -do_neon_dup (void) +do_neon_zip_uzp (void) { - if (inst.operands[1].isscalar) - { - enum neon_shape rs = neon_select_shape (NS_DS, NS_QS, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_EQK, N_8 | N_16 | N_32 | N_KEY); - unsigned sizebits = et.size >> 3; - unsigned dm = NEON_SCALAR_REG (inst.operands[1].reg); - int logsize = neon_logbits (et.size); - unsigned x = NEON_SCALAR_INDEX (inst.operands[1].reg) << logsize; - - if (vfp_or_neon_is_neon (NEON_CHECK_CC) == FAIL) - return; - - NEON_ENCODE (SCALAR, inst); - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (dm); - inst.instruction |= HI1 (dm) << 5; - inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= x << 17; - inst.instruction |= sizebits << 16; - - neon_dp_fixup (&inst); - } - else + enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_8 | N_16 | N_32 | N_KEY); + if (rs == NS_DD && et.size == 32) { - enum neon_shape rs = neon_select_shape (NS_DR, NS_QR, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_8 | N_16 | N_32 | N_KEY, N_EQK); - /* Duplicate ARM register to lanes of vector. */ - NEON_ENCODE (ARMREG, inst); - switch (et.size) - { - case 8: inst.instruction |= 0x400000; break; - case 16: inst.instruction |= 0x000020; break; - case 32: inst.instruction |= 0x000000; break; - default: break; - } - inst.instruction |= LOW4 (inst.operands[1].reg) << 12; - inst.instruction |= LOW4 (inst.operands[0].reg) << 16; - inst.instruction |= HI1 (inst.operands[0].reg) << 7; - inst.instruction |= neon_quad (rs) << 21; - /* The encoding for this instruction is identical for the ARM and Thumb - variants, except for the condition field. */ - do_vfp_cond_or_thumb (); + /* Special case: encode as VTRN.32
, . */ + inst.instruction = N_MNEM_vtrn; + do_neon_trn (); + return; } + neon_two_same (neon_quad (rs), 1, et.size); } static void -do_mve_mov (int toQ) +do_neon_sat_abs_neg (void) { - if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH)) return; - if (inst.cond > COND_ALWAYS) - inst.pred_insn_type = MVE_UNPREDICABLE_INSN; - unsigned Rt = 0, Rt2 = 1, Q0 = 2, Q1 = 3; - if (toQ) - { - Q0 = 0; - Q1 = 1; - Rt = 2; - Rt2 = 3; - } + enum neon_shape rs; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + rs = neon_select_shape (NS_QQ, NS_NULL); + else + rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); + neon_two_same (neon_quad (rs), 1, et.size); +} - constraint (inst.operands[Q0].reg != inst.operands[Q1].reg + 2, - _("Index one must be [2,3] and index two must be two less than" - " index one.")); - constraint (inst.operands[Rt].reg == inst.operands[Rt2].reg, - _("General purpose registers may not be the same")); - constraint (inst.operands[Rt].reg == REG_SP - || inst.operands[Rt2].reg == REG_SP, - BAD_SP); - constraint (inst.operands[Rt].reg == REG_PC - || inst.operands[Rt2].reg == REG_PC, - BAD_PC); +static void +do_neon_pair_long (void) +{ + enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_32 | N_KEY); + /* Unsigned is encoded in OP field (bit 7) for these instruction. */ + inst.instruction |= (et.type == NT_unsigned) << 7; + neon_two_same (neon_quad (rs), 1, et.size); +} - inst.instruction = 0xec000f00; - inst.instruction |= HI1 (inst.operands[Q1].reg / 32) << 23; - inst.instruction |= !!toQ << 20; - inst.instruction |= inst.operands[Rt2].reg << 16; - inst.instruction |= LOW4 (inst.operands[Q1].reg / 32) << 13; - inst.instruction |= (inst.operands[Q1].reg % 4) << 4; - inst.instruction |= inst.operands[Rt].reg; +static void +do_neon_recip_est (void) +{ + enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK | N_FLT, N_F_16_32 | N_U32 | N_KEY); + inst.instruction |= (et.type == NT_float) << 8; + neon_two_same (neon_quad (rs), 1, et.size); } static void -do_mve_movn (void) +do_neon_cls (void) { - if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC)) return; - if (inst.cond > COND_ALWAYS) - inst.pred_insn_type = INSIDE_VPT_INSN; + enum neon_shape rs; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + rs = neon_select_shape (NS_QQ, NS_NULL); else - inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; - - struct neon_type_el et = neon_check_type (2, NS_QQ, N_EQK, N_I16 | N_I32 - | N_KEY); - - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= (neon_logbits (et.size) - 1) << 18; - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= LOW4 (inst.operands[1].reg); - inst.is_neon = 1; + rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); + neon_two_same (neon_quad (rs), 1, et.size); } -/* VMOV has particularly many variations. It can be one of: - 0. VMOV , - 1. VMOV
, - (Register operations, which are VORR with Rm = Rn.) - 2. VMOV.
, # - 3. VMOV.
, # - (Immediate loads.) - 4. VMOV. , - (ARM register to scalar.) - 5. VMOV , , - (Two ARM registers to vector.) - 6. VMOV.
, - (Scalar to ARM register.) - 7. VMOV , , - (Vector to two ARM registers.) - 8. VMOV.F32 , - 9. VMOV.F64
, - (VFP register moves.) - 10. VMOV.F32 , #imm - 11. VMOV.F64
, #imm - (VFP float immediate load.) - 12. VMOV , - (VFP single to ARM reg.) - 13. VMOV , - (ARM reg to VFP single.) - 14. VMOV , , , - (Two ARM regs to two VFP singles.) - 15. VMOV , , , - (Two VFP singles to two ARM regs.) - 16. VMOV , , , - 17. VMOV , , , - 18. VMOV.
, - 19. VMOV.
, +static void +do_neon_clz (void) +{ + if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC)) + return; - These cases can be disambiguated using neon_select_shape, except cases 1/9 - and 3/11 which depend on the operand type too. + enum neon_shape rs; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + rs = neon_select_shape (NS_QQ, NS_NULL); + else + rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - All the encoded bits are hardcoded by this function. + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_I8 | N_I16 | N_I32 | N_KEY); + neon_two_same (neon_quad (rs), 1, et.size); +} - Cases 4, 6 may be used with VFPv1 and above (only 32-bit transfers!). - Cases 5, 7 may be used with VFPv2 and above. +static void +do_neon_cnt (void) +{ + enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK | N_INT, N_8 | N_KEY); + neon_two_same (neon_quad (rs), 1, et.size); +} - FIXME: Some of the checking may be a bit sloppy (in a couple of cases you - can specify a type where it doesn't make sense to, and is ignored). */ +static void +do_neon_swp (void) +{ + enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); + neon_two_same (neon_quad (rs), 1, -1); +} static void -do_neon_mov (void) +do_neon_tbl_tbx (void) { - enum neon_shape rs = neon_select_shape (NS_RRSS, NS_SSRR, NS_RRFF, NS_FFRR, - NS_DRR, NS_RRD, NS_QQ, NS_DD, NS_QI, - NS_DI, NS_SR, NS_RS, NS_FF, NS_FI, - NS_RF, NS_FR, NS_HR, NS_RH, NS_HI, - NS_NULL); - struct neon_type_el et; - const char *ldconst = 0; + unsigned listlenbits; + neon_check_type (3, NS_DLD, N_EQK, N_EQK, N_8 | N_KEY); - switch (rs) + if (inst.operands[1].imm < 1 || inst.operands[1].imm > 4) { - case NS_DD: /* case 1/9. */ - et = neon_check_type (2, rs, N_EQK, N_F64 | N_KEY); - /* It is not an error here if no type is given. */ - inst.error = NULL; - if (et.type == NT_float && et.size == 64) - { - do_vfp_nsyn_opcode ("fcpyd"); - break; - } - /* fall through. */ - - case NS_QQ: /* case 0/1. */ - { - if (check_simd_pred_availability (0, NEON_CHECK_CC | NEON_CHECK_ARCH)) - return; - /* The architecture manual I have doesn't explicitly state which - value the U bit should have for register->register moves, but - the equivalent VORR instruction has U = 0, so do that. */ - inst.instruction = 0x0200110; - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg); - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= LOW4 (inst.operands[1].reg) << 16; - inst.instruction |= HI1 (inst.operands[1].reg) << 7; - inst.instruction |= neon_quad (rs) << 6; - - neon_dp_fixup (&inst); - } - break; - - case NS_DI: /* case 3/11. */ - et = neon_check_type (2, rs, N_EQK, N_F64 | N_KEY); - inst.error = NULL; - if (et.type == NT_float && et.size == 64) - { - /* case 11 (fconstd). */ - ldconst = "fconstd"; - goto encode_fconstd; - } - /* fall through. */ - - case NS_QI: /* case 2/3. */ - if (check_simd_pred_availability (0, NEON_CHECK_CC | NEON_CHECK_ARCH)) - return; - inst.instruction = 0x0800010; - neon_move_immediate (); - neon_dp_fixup (&inst); - break; - - case NS_SR: /* case 4. */ - { - unsigned bcdebits = 0; - int logsize; - unsigned dn = NEON_SCALAR_REG (inst.operands[0].reg); - unsigned x = NEON_SCALAR_INDEX (inst.operands[0].reg); - - /* . is optional here, defaulting to .32. */ - if (inst.vectype.elems == 0 - && inst.operands[0].vectype.type == NT_invtype - && inst.operands[1].vectype.type == NT_invtype) - { - inst.vectype.el[0].type = NT_untyped; - inst.vectype.el[0].size = 32; - inst.vectype.elems = 1; - } - - et = neon_check_type (2, NS_NULL, N_8 | N_16 | N_32 | N_KEY, N_EQK); - logsize = neon_logbits (et.size); - - if (et.size != 32) - { - if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) - && vfp_or_neon_is_neon (NEON_CHECK_ARCH) == FAIL) - return; - } - else - { - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1) - && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), - _(BAD_FPU)); - } - - if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) - { - if (inst.operands[1].reg == REG_SP) - as_tsktsk (MVE_BAD_SP); - else if (inst.operands[1].reg == REG_PC) - as_tsktsk (MVE_BAD_PC); - } - unsigned size = inst.operands[0].isscalar == 1 ? 64 : 128; - - constraint (et.type == NT_invtype, _("bad type for scalar")); - constraint (x >= size / et.size, _("scalar index out of range")); - + first_error (_("bad list length for table lookup")); + return; + } - switch (et.size) - { - case 8: bcdebits = 0x8; break; - case 16: bcdebits = 0x1; break; - case 32: bcdebits = 0x0; break; - default: ; - } + listlenbits = inst.operands[1].imm - 1; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + inst.instruction |= listlenbits << 8; - bcdebits |= (x & ((1 << (3-logsize)) - 1)) << logsize; + neon_dp_fixup (&inst); +} - inst.instruction = 0xe000b10; - do_vfp_cond_or_thumb (); - inst.instruction |= LOW4 (dn) << 16; - inst.instruction |= HI1 (dn) << 7; - inst.instruction |= inst.operands[1].reg << 12; - inst.instruction |= (bcdebits & 3) << 5; - inst.instruction |= ((bcdebits >> 2) & 3) << 21; - inst.instruction |= (x >> (3-logsize)) << 16; - } - break; +static void +do_neon_ldm_stm (void) +{ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + /* P, U and L bits are part of bitmask. */ + int is_dbmode = (inst.instruction & (1 << 24)) != 0; + unsigned offsetbits = inst.operands[1].imm * 2; - case NS_DRR: /* case 5 (fmdrr). */ - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) - && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), - _(BAD_FPU)); + if (inst.operands[1].issingle) + { + do_vfp_nsyn_ldm_stm (is_dbmode); + return; + } - inst.instruction = 0xc400b10; - do_vfp_cond_or_thumb (); - inst.instruction |= LOW4 (inst.operands[0].reg); - inst.instruction |= HI1 (inst.operands[0].reg) << 5; - inst.instruction |= inst.operands[1].reg << 12; - inst.instruction |= inst.operands[2].reg << 16; - break; + constraint (is_dbmode && !inst.operands[0].writeback, + _("writeback (!) must be used for VLDMDB and VSTMDB")); - case NS_RS: /* case 6. */ - { - unsigned logsize; - unsigned dn = NEON_SCALAR_REG (inst.operands[1].reg); - unsigned x = NEON_SCALAR_INDEX (inst.operands[1].reg); - unsigned abcdebits = 0; + constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16, + _("register list must contain at least 1 and at most 16 " + "registers")); - /* .
is optional here, defaulting to .32. */ - if (inst.vectype.elems == 0 - && inst.operands[0].vectype.type == NT_invtype - && inst.operands[1].vectype.type == NT_invtype) - { - inst.vectype.el[0].type = NT_untyped; - inst.vectype.el[0].size = 32; - inst.vectype.elems = 1; - } + inst.instruction |= inst.operands[0].reg << 16; + inst.instruction |= inst.operands[0].writeback << 21; + inst.instruction |= LOW4 (inst.operands[1].reg) << 12; + inst.instruction |= HI1 (inst.operands[1].reg) << 22; - et = neon_check_type (2, NS_NULL, - N_EQK, N_S8 | N_S16 | N_U8 | N_U16 | N_32 | N_KEY); - logsize = neon_logbits (et.size); + inst.instruction |= offsetbits; - if (et.size != 32) - { - if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) - && vfp_or_neon_is_neon (NEON_CHECK_CC - | NEON_CHECK_ARCH) == FAIL) - return; - } - else - { - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1) - && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), - _(BAD_FPU)); - } + do_vfp_cond_or_thumb (); +} - if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) - { - if (inst.operands[0].reg == REG_SP) - as_tsktsk (MVE_BAD_SP); - else if (inst.operands[0].reg == REG_PC) - as_tsktsk (MVE_BAD_PC); - } +static void +do_vfp_nsyn_pop (void) +{ + nsyn_insert_sp (); + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) { + return do_vfp_nsyn_opcode ("vldm"); + } - unsigned size = inst.operands[1].isscalar == 1 ? 64 : 128; + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd), + _(BAD_FPU)); - constraint (et.type == NT_invtype, _("bad type for scalar")); - constraint (x >= size / et.size, _("scalar index out of range")); + constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16, + _("register list must contain at least 1 and at most 16 " + "registers")); - switch (et.size) - { - case 8: abcdebits = (et.type == NT_signed) ? 0x08 : 0x18; break; - case 16: abcdebits = (et.type == NT_signed) ? 0x01 : 0x11; break; - case 32: abcdebits = 0x00; break; - default: ; - } + if (inst.operands[1].issingle) + do_vfp_nsyn_opcode ("fldmias"); + else + do_vfp_nsyn_opcode ("fldmiad"); +} - abcdebits |= (x & ((1 << (3-logsize)) - 1)) << logsize; - inst.instruction = 0xe100b10; - do_vfp_cond_or_thumb (); - inst.instruction |= LOW4 (dn) << 16; - inst.instruction |= HI1 (dn) << 7; - inst.instruction |= inst.operands[0].reg << 12; - inst.instruction |= (abcdebits & 3) << 5; - inst.instruction |= (abcdebits >> 2) << 21; - inst.instruction |= (x >> (3-logsize)) << 16; - } - break; +static void +do_vfp_nsyn_push (void) +{ + nsyn_insert_sp (); + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) { + return do_vfp_nsyn_opcode ("vstmdb"); + } - case NS_RRD: /* case 7 (fmrrd). */ - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) - && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), - _(BAD_FPU)); + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd), + _(BAD_FPU)); - inst.instruction = 0xc500b10; - do_vfp_cond_or_thumb (); - inst.instruction |= inst.operands[0].reg << 12; - inst.instruction |= inst.operands[1].reg << 16; - inst.instruction |= LOW4 (inst.operands[2].reg); - inst.instruction |= HI1 (inst.operands[2].reg) << 5; - break; + constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16, + _("register list must contain at least 1 and at most 16 " + "registers")); - case NS_FF: /* case 8 (fcpys). */ - do_vfp_nsyn_opcode ("fcpys"); - break; + if (inst.operands[1].issingle) + do_vfp_nsyn_opcode ("fstmdbs"); + else + do_vfp_nsyn_opcode ("fstmdbd"); +} - case NS_HI: - case NS_FI: /* case 10 (fconsts). */ - ldconst = "fconsts"; - encode_fconstd: - if (!inst.operands[1].immisfloat) - { - unsigned new_imm; - /* Immediate has to fit in 8 bits so float is enough. */ - float imm = (float) inst.operands[1].imm; - memcpy (&new_imm, &imm, sizeof (float)); - /* But the assembly may have been written to provide an integer - bit pattern that equates to a float, so check that the - conversion has worked. */ - if (is_quarter_float (new_imm)) - { - if (is_quarter_float (inst.operands[1].imm)) - as_warn (_("immediate constant is valid both as a bit-pattern and a floating point value (using the fp value)")); - inst.operands[1].imm = new_imm; - inst.operands[1].immisfloat = 1; - } - } +static void +do_neon_ldr_str (void) +{ + int is_ldr = (inst.instruction & (1 << 20)) != 0; - if (is_quarter_float (inst.operands[1].imm)) - { - inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm); - do_vfp_nsyn_opcode (ldconst); + /* Use of PC in vstr in ARM mode is deprecated in ARMv7. + And is UNPREDICTABLE in thumb mode. */ + if (!is_ldr + && inst.operands[1].reg == REG_PC + && (ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v7) || thumb_mode)) + { + if (thumb_mode) + inst.error = _("Use of PC here is UNPREDICTABLE"); + else if (warn_on_deprecated) + as_tsktsk (_("Use of PC here is deprecated")); + } - /* ARMv8.2 fp16 vmov.f16 instruction. */ - if (rs == NS_HI) - do_scalar_fp16_v82_encode (); - } + if (inst.operands[0].issingle) + { + if (is_ldr) + do_vfp_nsyn_opcode ("flds"); else - first_error (_("immediate out of range")); - break; + do_vfp_nsyn_opcode ("fsts"); - case NS_RH: - case NS_RF: /* case 12 (fmrs). */ - do_vfp_nsyn_opcode ("fmrs"); - /* ARMv8.2 fp16 vmov.f16 instruction. */ - if (rs == NS_RH) + /* ARMv8.2 vldr.16/vstr.16 instruction. */ + if (inst.vectype.el[0].size == 16) do_scalar_fp16_v82_encode (); - break; + } + else + { + if (is_ldr) + do_vfp_nsyn_opcode ("fldd"); + else + do_vfp_nsyn_opcode ("fstd"); + } +} - case NS_HR: - case NS_FR: /* case 13 (fmsr). */ - do_vfp_nsyn_opcode ("fmsr"); - /* ARMv8.2 fp16 vmov.f16 instruction. */ - if (rs == NS_HR) - do_scalar_fp16_v82_encode (); - break; +static void +do_t_vldr_vstr_sysreg (void) +{ + int fp_vldr_bitno = 20, sysreg_vldr_bitno = 20; + bfd_boolean is_vldr = ((inst.instruction & (1 << fp_vldr_bitno)) != 0); - case NS_RRSS: - do_mve_mov (0); - break; - case NS_SSRR: - do_mve_mov (1); - break; + /* Use of PC is UNPREDICTABLE. */ + if (inst.operands[1].reg == REG_PC) + inst.error = _("Use of PC here is UNPREDICTABLE"); - /* The encoders for the fmrrs and fmsrr instructions expect three operands - (one of which is a list), but we have parsed four. Do some fiddling to - make the operands what do_vfp_reg2_from_sp2 and do_vfp_sp2_from_reg2 - expect. */ - case NS_RRFF: /* case 14 (fmrrs). */ - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) - && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), - _(BAD_FPU)); - constraint (inst.operands[3].reg != inst.operands[2].reg + 1, - _("VFP registers must be adjacent")); - inst.operands[2].imm = 2; - memset (&inst.operands[3], '\0', sizeof (inst.operands[3])); - do_vfp_nsyn_opcode ("fmrrs"); - break; + if (inst.operands[1].immisreg) + inst.error = _("instruction does not accept register index"); - case NS_FFRR: /* case 15 (fmsrr). */ - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) - && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), - _(BAD_FPU)); - constraint (inst.operands[1].reg != inst.operands[0].reg + 1, - _("VFP registers must be adjacent")); - inst.operands[1] = inst.operands[2]; - inst.operands[2] = inst.operands[3]; - inst.operands[0].imm = 2; - memset (&inst.operands[3], '\0', sizeof (inst.operands[3])); - do_vfp_nsyn_opcode ("fmsrr"); - break; + if (!inst.operands[1].isreg) + inst.error = _("instruction does not accept PC-relative addressing"); - case NS_NULL: - /* neon_select_shape has determined that the instruction - shape is wrong and has already set the error message. */ - break; + if (abs (inst.operands[1].imm) >= (1 << 7)) + inst.error = _("immediate value out of range"); + + inst.instruction = 0xec000f80; + if (is_vldr) + inst.instruction |= 1 << sysreg_vldr_bitno; + encode_arm_cp_address (1, TRUE, FALSE, BFD_RELOC_ARM_T32_VLDR_VSTR_OFF_IMM); + inst.instruction |= (inst.operands[0].imm & 0x7) << 13; + inst.instruction |= (inst.operands[0].imm & 0x8) << 19; +} + +static void +do_vldr_vstr (void) +{ + bfd_boolean sysreg_op = !inst.operands[0].isreg; - default: - abort (); + /* VLDR/VSTR (System Register). */ + if (sysreg_op) + { + if (!mark_feature_used (&arm_ext_v8_1m_main)) + as_bad (_("Instruction not permitted on this architecture")); + + do_t_vldr_vstr_sysreg (); + } + /* VLDR/VSTR. */ + else + { + if (!mark_feature_used (&fpu_vfp_ext_v1xd) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + as_bad (_("Instruction not permitted on this architecture")); + do_neon_ldr_str (); } } +/* "interleave" version also handles non-interleaving register VLD1/VST1 + instructions. */ + static void -do_mve_movl (void) +do_neon_ld_st_interleave (void) { - if (!(inst.operands[0].present && inst.operands[0].isquad - && inst.operands[1].present && inst.operands[1].isquad - && !inst.operands[2].present)) + struct neon_type_el et = neon_check_type (1, NS_NULL, + N_8 | N_16 | N_32 | N_64); + unsigned alignbits = 0; + unsigned idx; + /* The bits in this table go: + 0: register stride of one (0) or two (1) + 1,2: register list length, minus one (1, 2, 3, 4). + 3,4: in instruction type, minus one (VLD / VST). + We use -1 for invalid entries. */ + const int typetable[] = { - inst.instruction = 0; - inst.cond = 0xb; - if (thumb_mode) - set_pred_insn_type (INSIDE_IT_INSN); - do_neon_mov (); - return; - } + 0x7, -1, 0xa, -1, 0x6, -1, 0x2, -1, /* VLD1 / VST1. */ + -1, -1, 0x8, 0x9, -1, -1, 0x3, -1, /* VLD2 / VST2. */ + -1, -1, -1, -1, 0x4, 0x5, -1, -1, /* VLD3 / VST3. */ + -1, -1, -1, -1, -1, -1, 0x0, 0x1 /* VLD4 / VST4. */ + }; + int typebits; - if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + if (et.type == NT_invtype) return; - if (inst.cond != COND_ALWAYS) - inst.pred_insn_type = INSIDE_VPT_INSN; + if (inst.operands[1].immisalign) + switch (inst.operands[1].imm >> 8) + { + case 64: alignbits = 1; break; + case 128: + if (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 2 + && NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4) + goto bad_alignment; + alignbits = 2; + break; + case 256: + if (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4) + goto bad_alignment; + alignbits = 3; + break; + default: + bad_alignment: + first_error (_("bad alignment")); + return; + } - struct neon_type_el et = neon_check_type (2, NS_QQ, N_EQK, N_S8 | N_U8 - | N_S16 | N_U16 | N_KEY); + inst.instruction |= alignbits << 4; + inst.instruction |= neon_logbits (et.size) << 6; - inst.instruction |= (et.type == NT_unsigned) << 28; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= (neon_logbits (et.size) + 1) << 19; - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= LOW4 (inst.operands[1].reg); - inst.is_neon = 1; + /* Bits [4:6] of the immediate in a list specifier encode register stride + (minus 1) in bit 4, and list length in bits [5:6]. We put the of + VLD/VST in bits [9:8] of the initial bitmask. Suck it out here, look + up the right value for "type" in a table based on this value and the given + list style, then stick it back. */ + idx = ((inst.operands[0].imm >> 4) & 7) + | (((inst.instruction >> 8) & 3) << 3); + + typebits = typetable[idx]; + + constraint (typebits == -1, _("bad list type for instruction")); + constraint (((inst.instruction >> 8) & 3) && et.size == 64, + BAD_EL_TYPE); + + inst.instruction &= ~0xf00; + inst.instruction |= typebits << 8; } -static void -do_neon_rshift_round_imm (void) +/* Check alignment is valid for do_neon_ld_st_lane and do_neon_ld_dup. + *DO_ALIGN is set to 1 if the relevant alignment bit should be set, 0 + otherwise. The variable arguments are a list of pairs of legal (size, align) + values, terminated with -1. */ + +static int +neon_alignment_bit (int size, int align, int *do_alignment, ...) { - enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY); - int imm = inst.operands[2].imm; + va_list ap; + int result = FAIL, thissize, thisalign; - /* imm == 0 case is encoded as VMOV for V{R}SHR. */ - if (imm == 0) + if (!inst.operands[1].immisalign) { - inst.operands[2].present = 0; - do_neon_mov (); - return; + *do_alignment = 0; + return SUCCESS; } - constraint (imm < 1 || (unsigned)imm > et.size, - _("immediate out of range for shift")); - neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et, - et.size - imm); -} - -static void -do_neon_movhf (void) -{ - enum neon_shape rs = neon_select_shape (NS_HH, NS_NULL); - constraint (rs != NS_HH, _("invalid suffix")); + va_start (ap, do_alignment); - if (inst.cond != COND_ALWAYS) + do { - if (thumb_mode) - { - as_warn (_("ARMv8.2 scalar fp16 instruction cannot be conditional," - " the behaviour is UNPREDICTABLE")); - } - else - { - inst.error = BAD_COND; - return; - } + thissize = va_arg (ap, int); + if (thissize == -1) + break; + thisalign = va_arg (ap, int); + + if (size == thissize && align == thisalign) + result = SUCCESS; } + while (result != SUCCESS); - do_vfp_sp_monadic (); + va_end (ap); - inst.is_neon = 1; - inst.instruction |= 0xf0000000; -} + if (result == SUCCESS) + *do_alignment = 1; + else + first_error (_("unsupported alignment for instruction")); -static void -do_neon_movl (void) -{ - struct neon_type_el et = neon_check_type (2, NS_QD, - N_EQK | N_DBL, N_SU_32 | N_KEY); - unsigned sizebits = et.size >> 3; - inst.instruction |= sizebits << 19; - neon_two_same (0, et.type == NT_unsigned, -1); + return result; } static void -do_neon_trn (void) +do_neon_ld_st_lane (void) { - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_EQK, N_8 | N_16 | N_32 | N_KEY); - NEON_ENCODE (INTEGER, inst); - neon_two_same (neon_quad (rs), 1, et.size); -} + struct neon_type_el et = neon_check_type (1, NS_NULL, N_8 | N_16 | N_32); + int align_good, do_alignment = 0; + int logsize = neon_logbits (et.size); + int align = inst.operands[1].imm >> 8; + int n = (inst.instruction >> 8) & 3; + int max_el = 64 / et.size; -static void -do_neon_zip_uzp (void) -{ - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_EQK, N_8 | N_16 | N_32 | N_KEY); - if (rs == NS_DD && et.size == 32) + if (et.type == NT_invtype) + return; + + constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != n + 1, + _("bad list length")); + constraint (NEON_LANE (inst.operands[0].imm) >= max_el, + _("scalar index out of range")); + constraint (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2 + && et.size == 8, + _("stride of 2 unavailable when element size is 8")); + + switch (n) { - /* Special case: encode as VTRN.32
, . */ - inst.instruction = N_MNEM_vtrn; - do_neon_trn (); - return; + case 0: /* VLD1 / VST1. */ + align_good = neon_alignment_bit (et.size, align, &do_alignment, 16, 16, + 32, 32, -1); + if (align_good == FAIL) + return; + if (do_alignment) + { + unsigned alignbits = 0; + switch (et.size) + { + case 16: alignbits = 0x1; break; + case 32: alignbits = 0x3; break; + default: ; + } + inst.instruction |= alignbits << 4; + } + break; + + case 1: /* VLD2 / VST2. */ + align_good = neon_alignment_bit (et.size, align, &do_alignment, 8, 16, + 16, 32, 32, 64, -1); + if (align_good == FAIL) + return; + if (do_alignment) + inst.instruction |= 1 << 4; + break; + + case 2: /* VLD3 / VST3. */ + constraint (inst.operands[1].immisalign, + _("can't use alignment with this instruction")); + break; + + case 3: /* VLD4 / VST4. */ + align_good = neon_alignment_bit (et.size, align, &do_alignment, 8, 32, + 16, 64, 32, 64, 32, 128, -1); + if (align_good == FAIL) + return; + if (do_alignment) + { + unsigned alignbits = 0; + switch (et.size) + { + case 8: alignbits = 0x1; break; + case 16: alignbits = 0x1; break; + case 32: alignbits = (align == 64) ? 0x1 : 0x2; break; + default: ; + } + inst.instruction |= alignbits << 4; + } + break; + + default: ; } - neon_two_same (neon_quad (rs), 1, et.size); -} -static void -do_neon_sat_abs_neg (void) -{ - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); - neon_two_same (neon_quad (rs), 1, et.size); -} + /* Reg stride of 2 is encoded in bit 5 when size==16, bit 6 when size==32. */ + if (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2) + inst.instruction |= 1 << (4 + logsize); -static void -do_neon_pair_long (void) -{ - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_32 | N_KEY); - /* Unsigned is encoded in OP field (bit 7) for these instruction. */ - inst.instruction |= (et.type == NT_unsigned) << 7; - neon_two_same (neon_quad (rs), 1, et.size); + inst.instruction |= NEON_LANE (inst.operands[0].imm) << (logsize + 5); + inst.instruction |= logsize << 10; } -static void -do_neon_recip_est (void) -{ - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_EQK | N_FLT, N_F_16_32 | N_U32 | N_KEY); - inst.instruction |= (et.type == NT_float) << 8; - neon_two_same (neon_quad (rs), 1, et.size); -} +/* Encode single n-element structure to all lanes VLD instructions. */ static void -do_neon_cls (void) +do_neon_ld_dup (void) { - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); - neon_two_same (neon_quad (rs), 1, et.size); -} + struct neon_type_el et = neon_check_type (1, NS_NULL, N_8 | N_16 | N_32); + int align_good, do_alignment = 0; -static void -do_neon_clz (void) -{ - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_EQK, N_I8 | N_I16 | N_I32 | N_KEY); - neon_two_same (neon_quad (rs), 1, et.size); -} + if (et.type == NT_invtype) + return; -static void -do_neon_cnt (void) -{ - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - struct neon_type_el et = neon_check_type (2, rs, - N_EQK | N_INT, N_8 | N_KEY); - neon_two_same (neon_quad (rs), 1, et.size); -} + switch ((inst.instruction >> 8) & 3) + { + case 0: /* VLD1. */ + gas_assert (NEON_REG_STRIDE (inst.operands[0].imm) != 2); + align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8, + &do_alignment, 16, 16, 32, 32, -1); + if (align_good == FAIL) + return; + switch (NEON_REGLIST_LENGTH (inst.operands[0].imm)) + { + case 1: break; + case 2: inst.instruction |= 1 << 5; break; + default: first_error (_("bad list length")); return; + } + inst.instruction |= neon_logbits (et.size) << 6; + break; -static void -do_neon_swp (void) -{ - enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - neon_two_same (neon_quad (rs), 1, -1); -} + case 1: /* VLD2. */ + align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8, + &do_alignment, 8, 16, 16, 32, 32, 64, + -1); + if (align_good == FAIL) + return; + constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 2, + _("bad list length")); + if (NEON_REG_STRIDE (inst.operands[0].imm) == 2) + inst.instruction |= 1 << 5; + inst.instruction |= neon_logbits (et.size) << 6; + break; -static void -do_neon_tbl_tbx (void) -{ - unsigned listlenbits; - neon_check_type (3, NS_DLD, N_EQK, N_EQK, N_8 | N_KEY); + case 2: /* VLD3. */ + constraint (inst.operands[1].immisalign, + _("can't use alignment with this instruction")); + constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 3, + _("bad list length")); + if (NEON_REG_STRIDE (inst.operands[0].imm) == 2) + inst.instruction |= 1 << 5; + inst.instruction |= neon_logbits (et.size) << 6; + break; - if (inst.operands[1].imm < 1 || inst.operands[1].imm > 4) - { - first_error (_("bad list length for table lookup")); - return; - } + case 3: /* VLD4. */ + { + int align = inst.operands[1].imm >> 8; + align_good = neon_alignment_bit (et.size, align, &do_alignment, 8, 32, + 16, 64, 32, 64, 32, 128, -1); + if (align_good == FAIL) + return; + constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4, + _("bad list length")); + if (NEON_REG_STRIDE (inst.operands[0].imm) == 2) + inst.instruction |= 1 << 5; + if (et.size == 32 && align == 128) + inst.instruction |= 0x3 << 6; + else + inst.instruction |= neon_logbits (et.size) << 6; + } + break; - listlenbits = inst.operands[1].imm - 1; - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg) << 16; - inst.instruction |= HI1 (inst.operands[1].reg) << 7; - inst.instruction |= LOW4 (inst.operands[2].reg); - inst.instruction |= HI1 (inst.operands[2].reg) << 5; - inst.instruction |= listlenbits << 8; + default: ; + } - neon_dp_fixup (&inst); + inst.instruction |= do_alignment << 4; } +/* Disambiguate VLD and VST instructions, and fill in common bits (those + apart from bits [11:4]. */ + static void -do_neon_ldm_stm (void) +do_neon_ldx_stx (void) { - /* P, U and L bits are part of bitmask. */ - int is_dbmode = (inst.instruction & (1 << 24)) != 0; - unsigned offsetbits = inst.operands[1].imm * 2; + if (inst.operands[1].isreg) + constraint (inst.operands[1].reg == REG_PC, BAD_PC); - if (inst.operands[1].issingle) + switch (NEON_LANE (inst.operands[0].imm)) { - do_vfp_nsyn_ldm_stm (is_dbmode); - return; - } + case NEON_INTERLEAVE_LANES: + NEON_ENCODE (INTERLV, inst); + do_neon_ld_st_interleave (); + break; - constraint (is_dbmode && !inst.operands[0].writeback, - _("writeback (!) must be used for VLDMDB and VSTMDB")); + case NEON_ALL_LANES: + NEON_ENCODE (DUP, inst); + if (inst.instruction == N_INV) + { + first_error ("only loads support such operands"); + break; + } + do_neon_ld_dup (); + break; - constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16, - _("register list must contain at least 1 and at most 16 " - "registers")); + default: + NEON_ENCODE (LANE, inst); + do_neon_ld_st_lane (); + } - inst.instruction |= inst.operands[0].reg << 16; - inst.instruction |= inst.operands[0].writeback << 21; - inst.instruction |= LOW4 (inst.operands[1].reg) << 12; - inst.instruction |= HI1 (inst.operands[1].reg) << 22; + /* L bit comes from bit mask. */ + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= inst.operands[1].reg << 16; - inst.instruction |= offsetbits; + if (inst.operands[1].postind) + { + int postreg = inst.operands[1].imm & 0xf; + constraint (!inst.operands[1].immisreg, + _("post-index must be a register")); + constraint (postreg == 0xd || postreg == 0xf, + _("bad register for post-index")); + inst.instruction |= postreg; + } + else + { + constraint (inst.operands[1].immisreg, BAD_ADDR_MODE); + constraint (inst.relocs[0].exp.X_op != O_constant + || inst.relocs[0].exp.X_add_number != 0, + BAD_ADDR_MODE); - do_vfp_cond_or_thumb (); + if (inst.operands[1].writeback) + { + inst.instruction |= 0xd; + } + else + inst.instruction |= 0xf; + } + + if (thumb_mode) + inst.instruction |= 0xf9000000; + else + inst.instruction |= 0xf4000000; } +/* FP v8. */ static void -do_neon_ldr_str (void) +do_vfp_nsyn_fpv8 (enum neon_shape rs) { - int is_ldr = (inst.instruction & (1 << 20)) != 0; + /* Targets like FPv5-SP-D16 don't support FP v8 instructions with + D register operands. */ + if (neon_shape_class[rs] == SC_DOUBLE) + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), + _(BAD_FPU)); - /* Use of PC in vstr in ARM mode is deprecated in ARMv7. - And is UNPREDICTABLE in thumb mode. */ - if (!is_ldr - && inst.operands[1].reg == REG_PC - && (ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v7) || thumb_mode)) - { - if (thumb_mode) - inst.error = _("Use of PC here is UNPREDICTABLE"); - else if (warn_on_deprecated) - as_tsktsk (_("Use of PC here is deprecated")); - } + NEON_ENCODE (FPV8, inst); - if (inst.operands[0].issingle) + if (rs == NS_FFF || rs == NS_HHH) { - if (is_ldr) - do_vfp_nsyn_opcode ("flds"); - else - do_vfp_nsyn_opcode ("fsts"); + do_vfp_sp_dyadic (); - /* ARMv8.2 vldr.16/vstr.16 instruction. */ - if (inst.vectype.el[0].size == 16) - do_scalar_fp16_v82_encode (); - } - else - { - if (is_ldr) - do_vfp_nsyn_opcode ("fldd"); - else - do_vfp_nsyn_opcode ("fstd"); + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HHH) + do_scalar_fp16_v82_encode (); } + else + do_vfp_dp_rd_rn_rm (); + + if (rs == NS_DDD) + inst.instruction |= 0x100; + + inst.instruction |= 0xf0000000; } static void -do_t_vldr_vstr_sysreg (void) +do_vsel (void) { - int fp_vldr_bitno = 20, sysreg_vldr_bitno = 20; - bfd_boolean is_vldr = ((inst.instruction & (1 << fp_vldr_bitno)) != 0); + set_pred_insn_type (OUTSIDE_PRED_INSN); - /* Use of PC is UNPREDICTABLE. */ - if (inst.operands[1].reg == REG_PC) - inst.error = _("Use of PC here is UNPREDICTABLE"); + if (try_vfp_nsyn (3, do_vfp_nsyn_fpv8) != SUCCESS) + first_error (_("invalid instruction shape")); +} - if (inst.operands[1].immisreg) - inst.error = _("instruction does not accept register index"); +static void +do_vmaxnm (void) +{ + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + set_pred_insn_type (OUTSIDE_PRED_INSN); - if (!inst.operands[1].isreg) - inst.error = _("instruction does not accept PC-relative addressing"); + if (try_vfp_nsyn (3, do_vfp_nsyn_fpv8) == SUCCESS) + return; - if (abs (inst.operands[1].imm) >= (1 << 7)) - inst.error = _("immediate value out of range"); + if (!check_simd_pred_availability (TRUE, NEON_CHECK_CC | NEON_CHECK_ARCH8)) + return; - inst.instruction = 0xec000f80; - if (is_vldr) - inst.instruction |= 1 << sysreg_vldr_bitno; - encode_arm_cp_address (1, TRUE, FALSE, BFD_RELOC_ARM_T32_VLDR_VSTR_OFF_IMM); - inst.instruction |= (inst.operands[0].imm & 0x7) << 13; - inst.instruction |= (inst.operands[0].imm & 0x8) << 19; + neon_dyadic_misc (NT_untyped, N_F_16_32, 0); } static void -do_vldr_vstr (void) +do_vrint_1 (enum neon_cvt_mode mode) { - bfd_boolean sysreg_op = !inst.operands[0].isreg; + enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_QQ, NS_NULL); + struct neon_type_el et; - /* VLDR/VSTR (System Register). */ - if (sysreg_op) - { - if (!mark_feature_used (&arm_ext_v8_1m_main)) - as_bad (_("Instruction not permitted on this architecture")); + if (rs == NS_NULL) + return; - do_t_vldr_vstr_sysreg (); - } - /* VLDR/VSTR. */ - else + /* Targets like FPv5-SP-D16 don't support FP v8 instructions with + D register operands. */ + if (neon_shape_class[rs] == SC_DOUBLE) + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), + _(BAD_FPU)); + + et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY + | N_VFP); + if (et.type != NT_invtype) { - if (!mark_feature_used (&fpu_vfp_ext_v1xd)) - as_bad (_("Instruction not permitted on this architecture")); - do_neon_ldr_str (); - } -} + /* VFP encodings. */ + if (mode == neon_cvt_mode_a || mode == neon_cvt_mode_n + || mode == neon_cvt_mode_p || mode == neon_cvt_mode_m) + set_pred_insn_type (OUTSIDE_PRED_INSN); -/* "interleave" version also handles non-interleaving register VLD1/VST1 - instructions. */ + NEON_ENCODE (FPV8, inst); + if (rs == NS_FF || rs == NS_HH) + do_vfp_sp_monadic (); + else + do_vfp_dp_rd_rm (); -static void -do_neon_ld_st_interleave (void) -{ - struct neon_type_el et = neon_check_type (1, NS_NULL, - N_8 | N_16 | N_32 | N_64); - unsigned alignbits = 0; - unsigned idx; - /* The bits in this table go: - 0: register stride of one (0) or two (1) - 1,2: register list length, minus one (1, 2, 3, 4). - 3,4: in instruction type, minus one (VLD / VST). - We use -1 for invalid entries. */ - const int typetable[] = + switch (mode) + { + case neon_cvt_mode_r: inst.instruction |= 0x00000000; break; + case neon_cvt_mode_z: inst.instruction |= 0x00000080; break; + case neon_cvt_mode_x: inst.instruction |= 0x00010000; break; + case neon_cvt_mode_a: inst.instruction |= 0xf0000000; break; + case neon_cvt_mode_n: inst.instruction |= 0xf0010000; break; + case neon_cvt_mode_p: inst.instruction |= 0xf0020000; break; + case neon_cvt_mode_m: inst.instruction |= 0xf0030000; break; + default: abort (); + } + + inst.instruction |= (rs == NS_DD) << 8; + do_vfp_cond_or_thumb (); + + /* ARMv8.2 fp16 vrint instruction. */ + if (rs == NS_HH) + do_scalar_fp16_v82_encode (); + } + else { - 0x7, -1, 0xa, -1, 0x6, -1, 0x2, -1, /* VLD1 / VST1. */ - -1, -1, 0x8, 0x9, -1, -1, 0x3, -1, /* VLD2 / VST2. */ - -1, -1, -1, -1, 0x4, 0x5, -1, -1, /* VLD3 / VST3. */ - -1, -1, -1, -1, -1, -1, 0x0, 0x1 /* VLD4 / VST4. */ - }; - int typebits; + /* Neon encodings (or something broken...). */ + inst.error = NULL; + et = neon_check_type (2, rs, N_EQK, N_F_16_32 | N_KEY); - if (et.type == NT_invtype) - return; + if (et.type == NT_invtype) + return; - if (inst.operands[1].immisalign) - switch (inst.operands[1].imm >> 8) - { - case 64: alignbits = 1; break; - case 128: - if (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 2 - && NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4) - goto bad_alignment; - alignbits = 2; - break; - case 256: - if (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4) - goto bad_alignment; - alignbits = 3; - break; - default: - bad_alignment: - first_error (_("bad alignment")); + if (!check_simd_pred_availability (TRUE, + NEON_CHECK_CC | NEON_CHECK_ARCH8)) return; - } - inst.instruction |= alignbits << 4; - inst.instruction |= neon_logbits (et.size) << 6; + NEON_ENCODE (FLOAT, inst); - /* Bits [4:6] of the immediate in a list specifier encode register stride - (minus 1) in bit 4, and list length in bits [5:6]. We put the of - VLD/VST in bits [9:8] of the initial bitmask. Suck it out here, look - up the right value for "type" in a table based on this value and the given - list style, then stick it back. */ - idx = ((inst.operands[0].imm >> 4) & 7) - | (((inst.instruction >> 8) & 3) << 3); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= neon_quad (rs) << 6; + /* Mask off the original size bits and reencode them. */ + inst.instruction = ((inst.instruction & 0xfff3ffff) + | neon_logbits (et.size) << 18); - typebits = typetable[idx]; + switch (mode) + { + case neon_cvt_mode_z: inst.instruction |= 3 << 7; break; + case neon_cvt_mode_x: inst.instruction |= 1 << 7; break; + case neon_cvt_mode_a: inst.instruction |= 2 << 7; break; + case neon_cvt_mode_n: inst.instruction |= 0 << 7; break; + case neon_cvt_mode_p: inst.instruction |= 7 << 7; break; + case neon_cvt_mode_m: inst.instruction |= 5 << 7; break; + case neon_cvt_mode_r: inst.error = _("invalid rounding mode"); break; + default: abort (); + } - constraint (typebits == -1, _("bad list type for instruction")); - constraint (((inst.instruction >> 8) & 3) && et.size == 64, - BAD_EL_TYPE); + if (thumb_mode) + inst.instruction |= 0xfc000000; + else + inst.instruction |= 0xf0000000; + } +} - inst.instruction &= ~0xf00; - inst.instruction |= typebits << 8; +static void +do_vrintx (void) +{ + do_vrint_1 (neon_cvt_mode_x); } -/* Check alignment is valid for do_neon_ld_st_lane and do_neon_ld_dup. - *DO_ALIGN is set to 1 if the relevant alignment bit should be set, 0 - otherwise. The variable arguments are a list of pairs of legal (size, align) - values, terminated with -1. */ +static void +do_vrintz (void) +{ + do_vrint_1 (neon_cvt_mode_z); +} -static int -neon_alignment_bit (int size, int align, int *do_alignment, ...) +static void +do_vrintr (void) { - va_list ap; - int result = FAIL, thissize, thisalign; + do_vrint_1 (neon_cvt_mode_r); +} - if (!inst.operands[1].immisalign) - { - *do_alignment = 0; - return SUCCESS; - } +static void +do_vrinta (void) +{ + do_vrint_1 (neon_cvt_mode_a); +} - va_start (ap, do_alignment); +static void +do_vrintn (void) +{ + do_vrint_1 (neon_cvt_mode_n); +} - do - { - thissize = va_arg (ap, int); - if (thissize == -1) - break; - thisalign = va_arg (ap, int); +static void +do_vrintp (void) +{ + do_vrint_1 (neon_cvt_mode_p); +} - if (size == thissize && align == thisalign) - result = SUCCESS; - } - while (result != SUCCESS); +static void +do_vrintm (void) +{ + do_vrint_1 (neon_cvt_mode_m); +} - va_end (ap); +static unsigned +neon_scalar_for_vcmla (unsigned opnd, unsigned elsize) +{ + unsigned regno = NEON_SCALAR_REG (opnd); + unsigned elno = NEON_SCALAR_INDEX (opnd); - if (result == SUCCESS) - *do_alignment = 1; - else - first_error (_("unsupported alignment for instruction")); + if (elsize == 16 && elno < 2 && regno < 16) + return regno | (elno << 4); + else if (elsize == 32 && elno == 0) + return regno; - return result; + first_error (_("scalar out of range")); + return 0; } static void -do_neon_ld_st_lane (void) +do_vcmla (void) { - struct neon_type_el et = neon_check_type (1, NS_NULL, N_8 | N_16 | N_32); - int align_good, do_alignment = 0; - int logsize = neon_logbits (et.size); - int align = inst.operands[1].imm >> 8; - int n = (inst.instruction >> 8) & 3; - int max_el = 64 / et.size; + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext) + && (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8) + || !mark_feature_used (&arm_ext_v8_3)), (BAD_FPU)); + constraint (inst.relocs[0].exp.X_op != O_constant, + _("expression too complex")); + unsigned rot = inst.relocs[0].exp.X_add_number; + constraint (rot != 0 && rot != 90 && rot != 180 && rot != 270, + _("immediate out of range")); + rot /= 90; - if (et.type == NT_invtype) + if (!check_simd_pred_availability (TRUE, + NEON_CHECK_ARCH8 | NEON_CHECK_CC)) return; - constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != n + 1, - _("bad list length")); - constraint (NEON_LANE (inst.operands[0].imm) >= max_el, - _("scalar index out of range")); - constraint (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2 - && et.size == 8, - _("stride of 2 unavailable when element size is 8")); - - switch (n) + if (inst.operands[2].isscalar) { - case 0: /* VLD1 / VST1. */ - align_good = neon_alignment_bit (et.size, align, &do_alignment, 16, 16, - 32, 32, -1); - if (align_good == FAIL) - return; - if (do_alignment) - { - unsigned alignbits = 0; - switch (et.size) - { - case 16: alignbits = 0x1; break; - case 32: alignbits = 0x3; break; - default: ; - } - inst.instruction |= alignbits << 4; - } - break; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)) + first_error (_("invalid instruction shape")); + enum neon_shape rs = neon_select_shape (NS_DDSI, NS_QQSI, NS_NULL); + unsigned size = neon_check_type (3, rs, N_EQK, N_EQK, + N_KEY | N_F16 | N_F32).size; + unsigned m = neon_scalar_for_vcmla (inst.operands[2].reg, size); + inst.is_neon = 1; + inst.instruction = 0xfe000800; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (m); + inst.instruction |= HI1 (m) << 5; + inst.instruction |= neon_quad (rs) << 6; + inst.instruction |= rot << 20; + inst.instruction |= (size == 32) << 23; + } + else + { + enum neon_shape rs; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext)) + rs = neon_select_shape (NS_QQQI, NS_NULL); + else + rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL); - case 1: /* VLD2 / VST2. */ - align_good = neon_alignment_bit (et.size, align, &do_alignment, 8, 16, - 16, 32, 32, 64, -1); - if (align_good == FAIL) - return; - if (do_alignment) - inst.instruction |= 1 << 4; - break; + unsigned size = neon_check_type (3, rs, N_EQK, N_EQK, + N_KEY | N_F16 | N_F32).size; + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext) && size == 32 + && (inst.operands[0].reg == inst.operands[1].reg + || inst.operands[0].reg == inst.operands[2].reg)) + as_tsktsk (BAD_MVE_SRCDEST); - case 2: /* VLD3 / VST3. */ - constraint (inst.operands[1].immisalign, - _("can't use alignment with this instruction")); - break; + neon_three_same (neon_quad (rs), 0, -1); + inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */ + inst.instruction |= 0xfc200800; + inst.instruction |= rot << 23; + inst.instruction |= (size == 32) << 20; + } +} - case 3: /* VLD4 / VST4. */ - align_good = neon_alignment_bit (et.size, align, &do_alignment, 8, 32, - 16, 64, 32, 64, 32, 128, -1); - if (align_good == FAIL) - return; - if (do_alignment) - { - unsigned alignbits = 0; - switch (et.size) - { - case 8: alignbits = 0x1; break; - case 16: alignbits = 0x1; break; - case 32: alignbits = (align == 64) ? 0x1 : 0x2; break; - default: ; - } - inst.instruction |= alignbits << 4; - } - break; +static void +do_vcadd (void) +{ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + && (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8) + || !mark_feature_used (&arm_ext_v8_3)), (BAD_FPU)); + constraint (inst.relocs[0].exp.X_op != O_constant, + _("expression too complex")); - default: ; + unsigned rot = inst.relocs[0].exp.X_add_number; + constraint (rot != 90 && rot != 270, _("immediate out of range")); + enum neon_shape rs; + struct neon_type_el et; + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL); + et = neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16 | N_F32); + } + else + { + rs = neon_select_shape (NS_QQQI, NS_NULL); + et = neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16 | N_F32 | N_I8 + | N_I16 | N_I32); + if (et.size == 32 && inst.operands[0].reg == inst.operands[2].reg) + as_tsktsk (_("Warning: 32-bit element size and same first and third " + "operand makes instruction UNPREDICTABLE")); } - /* Reg stride of 2 is encoded in bit 5 when size==16, bit 6 when size==32. */ - if (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2) - inst.instruction |= 1 << (4 + logsize); + if (et.type == NT_invtype) + return; - inst.instruction |= NEON_LANE (inst.operands[0].imm) << (logsize + 5); - inst.instruction |= logsize << 10; + if (!check_simd_pred_availability (et.type == NT_float, + NEON_CHECK_ARCH8 | NEON_CHECK_CC)) + return; + + if (et.type == NT_float) + { + neon_three_same (neon_quad (rs), 0, -1); + inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */ + inst.instruction |= 0xfc800800; + inst.instruction |= (rot == 270) << 24; + inst.instruction |= (et.size == 32) << 20; + } + else + { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU); + inst.instruction = 0xfe000f00; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= neon_logbits (et.size) << 20; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= (rot == 270) << 12; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.is_neon = 1; + } } -/* Encode single n-element structure to all lanes VLD instructions. */ +/* Dot Product instructions encoding support. */ static void -do_neon_ld_dup (void) +do_neon_dotproduct (int unsigned_p) { - struct neon_type_el et = neon_check_type (1, NS_NULL, N_8 | N_16 | N_32); - int align_good, do_alignment = 0; + enum neon_shape rs; + unsigned scalar_oprd2 = 0; + int high8; - if (et.type == NT_invtype) - return; + if (inst.cond != COND_ALWAYS) + as_warn (_("Dot Product instructions cannot be conditional, the behaviour " + "is UNPREDICTABLE")); - switch ((inst.instruction >> 8) & 3) - { - case 0: /* VLD1. */ - gas_assert (NEON_REG_STRIDE (inst.operands[0].imm) != 2); - align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8, - &do_alignment, 16, 16, 32, 32, -1); - if (align_good == FAIL) - return; - switch (NEON_REGLIST_LENGTH (inst.operands[0].imm)) - { - case 1: break; - case 2: inst.instruction |= 1 << 5; break; - default: first_error (_("bad list length")); return; - } - inst.instruction |= neon_logbits (et.size) << 6; - break; + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8), + _(BAD_FPU)); - case 1: /* VLD2. */ - align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8, - &do_alignment, 8, 16, 16, 32, 32, 64, - -1); - if (align_good == FAIL) - return; - constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 2, - _("bad list length")); - if (NEON_REG_STRIDE (inst.operands[0].imm) == 2) - inst.instruction |= 1 << 5; - inst.instruction |= neon_logbits (et.size) << 6; - break; + /* Dot Product instructions are in three-same D/Q register format or the third + operand can be a scalar index register. */ + if (inst.operands[2].isscalar) + { + scalar_oprd2 = neon_scalar_for_mul (inst.operands[2].reg, 32); + high8 = 0xfe000000; + rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + } + else + { + high8 = 0xfc000000; + rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + } - case 2: /* VLD3. */ - constraint (inst.operands[1].immisalign, - _("can't use alignment with this instruction")); - constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 3, - _("bad list length")); - if (NEON_REG_STRIDE (inst.operands[0].imm) == 2) - inst.instruction |= 1 << 5; - inst.instruction |= neon_logbits (et.size) << 6; - break; + if (unsigned_p) + neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_U8); + else + neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_S8); - case 3: /* VLD4. */ - { - int align = inst.operands[1].imm >> 8; - align_good = neon_alignment_bit (et.size, align, &do_alignment, 8, 32, - 16, 64, 32, 64, 32, 128, -1); - if (align_good == FAIL) - return; - constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4, - _("bad list length")); - if (NEON_REG_STRIDE (inst.operands[0].imm) == 2) - inst.instruction |= 1 << 5; - if (et.size == 32 && align == 128) - inst.instruction |= 0x3 << 6; - else - inst.instruction |= neon_logbits (et.size) << 6; - } - break; + /* The "U" bit in traditional Three Same encoding is fixed to 0 for Dot + Product instruction, so we pass 0 as the "ubit" parameter. And the + "Size" field are fixed to 0x2, so we pass 32 as the "size" parameter. */ + neon_three_same (neon_quad (rs), 0, 32); - default: ; + /* Undo neon_dp_fixup. Dot Product instructions are using a slightly + different NEON three-same encoding. */ + inst.instruction &= 0x00ffffff; + inst.instruction |= high8; + /* Encode 'U' bit which indicates signedness. */ + inst.instruction |= (unsigned_p ? 1 : 0) << 4; + /* Re-encode operand2 if it's indexed scalar operand. What has been encoded + from inst.operand[2].reg in neon_three_same is GAS's internal encoding, not + the instruction encoding. */ + if (inst.operands[2].isscalar) + { + inst.instruction &= 0xffffffd0; + inst.instruction |= LOW4 (scalar_oprd2); + inst.instruction |= HI1 (scalar_oprd2) << 5; } +} - inst.instruction |= do_alignment << 4; +/* Dot Product instructions for signed integer. */ + +static void +do_neon_dotproduct_s (void) +{ + return do_neon_dotproduct (0); } -/* Disambiguate VLD and VST instructions, and fill in common bits (those - apart from bits [11:4]. */ +/* Dot Product instructions for unsigned integer. */ static void -do_neon_ldx_stx (void) +do_neon_dotproduct_u (void) { - if (inst.operands[1].isreg) - constraint (inst.operands[1].reg == REG_PC, BAD_PC); + return do_neon_dotproduct (1); +} - switch (NEON_LANE (inst.operands[0].imm)) +static void +do_vusdot (void) +{ + enum neon_shape rs; + set_pred_insn_type (OUTSIDE_PRED_INSN); + if (inst.operands[2].isscalar) { - case NEON_INTERLEAVE_LANES: - NEON_ENCODE (INTERLV, inst); - do_neon_ld_st_interleave (); - break; - - case NEON_ALL_LANES: - NEON_ENCODE (DUP, inst); - if (inst.instruction == N_INV) - { - first_error ("only loads support such operands"); - break; - } - do_neon_ld_dup (); - break; + rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY); - default: - NEON_ENCODE (LANE, inst); - do_neon_ld_st_lane (); + inst.instruction |= (1 << 25); + int index = inst.operands[2].reg & 0xf; + constraint ((index != 1 && index != 0), _("index must be 0 or 1")); + inst.operands[2].reg >>= 4; + constraint (!(inst.operands[2].reg < 16), + _("indexed register must be less than 16")); + neon_three_args (rs == NS_QQS); + inst.instruction |= (index << 5); } - - /* L bit comes from bit mask. */ - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= inst.operands[1].reg << 16; - - if (inst.operands[1].postind) + else { - int postreg = inst.operands[1].imm & 0xf; - constraint (!inst.operands[1].immisreg, - _("post-index must be a register")); - constraint (postreg == 0xd || postreg == 0xf, - _("bad register for post-index")); - inst.instruction |= postreg; + inst.instruction |= (1 << 21); + rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY); + neon_three_args (rs == NS_QQQ); } - else +} + +static void +do_vsudot (void) +{ + enum neon_shape rs; + set_pred_insn_type (OUTSIDE_PRED_INSN); + if (inst.operands[2].isscalar) { - constraint (inst.operands[1].immisreg, BAD_ADDR_MODE); - constraint (inst.relocs[0].exp.X_op != O_constant - || inst.relocs[0].exp.X_add_number != 0, - BAD_ADDR_MODE); + rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY); - if (inst.operands[1].writeback) - { - inst.instruction |= 0xd; - } - else - inst.instruction |= 0xf; + inst.instruction |= (1 << 25); + int index = inst.operands[2].reg & 0xf; + constraint ((index != 1 && index != 0), _("index must be 0 or 1")); + inst.operands[2].reg >>= 4; + constraint (!(inst.operands[2].reg < 16), + _("indexed register must be less than 16")); + neon_three_args (rs == NS_QQS); + inst.instruction |= (index << 5); } +} + +static void +do_vsmmla (void) +{ + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY); + + set_pred_insn_type (OUTSIDE_PRED_INSN); + + neon_three_args (1); - if (thumb_mode) - inst.instruction |= 0xf9000000; - else - inst.instruction |= 0xf4000000; } -/* FP v8. */ static void -do_vfp_nsyn_fpv8 (enum neon_shape rs) +do_vummla (void) { - /* Targets like FPv5-SP-D16 don't support FP v8 instructions with - D register operands. */ - if (neon_shape_class[rs] == SC_DOUBLE) - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), - _(BAD_FPU)); + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY); - NEON_ENCODE (FPV8, inst); + set_pred_insn_type (OUTSIDE_PRED_INSN); - if (rs == NS_FFF || rs == NS_HHH) - { - do_vfp_sp_dyadic (); + neon_three_args (1); - /* ARMv8.2 fp16 instruction. */ - if (rs == NS_HHH) - do_scalar_fp16_v82_encode (); - } - else - do_vfp_dp_rd_rn_rm (); +} - if (rs == NS_DDD) - inst.instruction |= 0x100; +static void +check_cde_operand (size_t index, int is_dual) +{ + unsigned Rx = inst.operands[index].reg; + bfd_boolean isvec = inst.operands[index].isvec; + if (is_dual == 0 && thumb_mode) + constraint ( + !((Rx <= 14 && Rx != 13) || (Rx == REG_PC && isvec)), + _("Register must be r0-r14 except r13, or APSR_nzcv.")); + else + constraint ( !((Rx <= 10 && Rx % 2 == 0 )), + _("Register must be an even register between r0-r10.")); +} - inst.instruction |= 0xf0000000; +static bfd_boolean +cde_coproc_enabled (unsigned coproc) +{ + switch (coproc) + { + case 0: return mark_feature_used (&arm_ext_cde0); + case 1: return mark_feature_used (&arm_ext_cde1); + case 2: return mark_feature_used (&arm_ext_cde2); + case 3: return mark_feature_used (&arm_ext_cde3); + case 4: return mark_feature_used (&arm_ext_cde4); + case 5: return mark_feature_used (&arm_ext_cde5); + case 6: return mark_feature_used (&arm_ext_cde6); + case 7: return mark_feature_used (&arm_ext_cde7); + default: return FALSE; + } } +#define cde_coproc_pos 8 static void -do_vsel (void) +cde_handle_coproc (void) { - set_pred_insn_type (OUTSIDE_PRED_INSN); + unsigned coproc = inst.operands[0].reg; + constraint (coproc > 7, _("CDE Coprocessor must be in range 0-7")); + constraint (!(cde_coproc_enabled (coproc)), BAD_CDE_COPROC); + inst.instruction |= coproc << cde_coproc_pos; +} +#undef cde_coproc_pos - if (try_vfp_nsyn (3, do_vfp_nsyn_fpv8) != SUCCESS) - first_error (_("invalid instruction shape")); +static void +cxn_handle_predication (bfd_boolean is_accum) +{ + if (is_accum && conditional_insn ()) + set_pred_insn_type (INSIDE_IT_INSN); + else if (conditional_insn ()) + /* conditional_insn essentially checks for a suffix, not whether the + instruction is inside an IT block or not. + The non-accumulator versions should not have suffixes. */ + inst.error = BAD_SYNTAX; + else + set_pred_insn_type (OUTSIDE_PRED_INSN); } static void -do_vmaxnm (void) +do_custom_instruction_1 (int is_dual, bfd_boolean is_accum) { - set_pred_insn_type (OUTSIDE_PRED_INSN); - if (try_vfp_nsyn (3, do_vfp_nsyn_fpv8) == SUCCESS) - return; + constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE)); - if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL) - return; + unsigned imm, Rd; - neon_dyadic_misc (NT_untyped, N_F_16_32, 0); + Rd = inst.operands[1].reg; + check_cde_operand (1, is_dual); + + if (is_dual == 1) + { + constraint (inst.operands[2].reg != Rd + 1, + _("cx1d requires consecutive destination registers.")); + imm = inst.operands[3].imm; + } + else if (is_dual == 0) + imm = inst.operands[2].imm; + else + abort (); + + inst.instruction |= Rd << 12; + inst.instruction |= (imm & 0x1F80) << 9; + inst.instruction |= (imm & 0x0040) << 1; + inst.instruction |= (imm & 0x003f); + + cde_handle_coproc (); + cxn_handle_predication (is_accum); } static void -do_vrint_1 (enum neon_cvt_mode mode) +do_custom_instruction_2 (int is_dual, bfd_boolean is_accum) { - enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_QQ, NS_NULL); - struct neon_type_el et; - if (rs == NS_NULL) - return; + constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE)); - /* Targets like FPv5-SP-D16 don't support FP v8 instructions with - D register operands. */ - if (neon_shape_class[rs] == SC_DOUBLE) - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), - _(BAD_FPU)); + unsigned imm, Rd, Rn; - et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY - | N_VFP); - if (et.type != NT_invtype) + Rd = inst.operands[1].reg; + + if (is_dual == 1) { - /* VFP encodings. */ - if (mode == neon_cvt_mode_a || mode == neon_cvt_mode_n - || mode == neon_cvt_mode_p || mode == neon_cvt_mode_m) - set_pred_insn_type (OUTSIDE_PRED_INSN); + constraint (inst.operands[2].reg != Rd + 1, + _("cx2d requires consecutive destination registers.")); + imm = inst.operands[4].imm; + Rn = inst.operands[3].reg; + } + else if (is_dual == 0) + { + imm = inst.operands[3].imm; + Rn = inst.operands[2].reg; + } + else + abort (); - NEON_ENCODE (FPV8, inst); - if (rs == NS_FF || rs == NS_HH) - do_vfp_sp_monadic (); - else - do_vfp_dp_rd_rm (); + check_cde_operand (2 + is_dual, /* is_dual = */0); + check_cde_operand (1, is_dual); - switch (mode) - { - case neon_cvt_mode_r: inst.instruction |= 0x00000000; break; - case neon_cvt_mode_z: inst.instruction |= 0x00000080; break; - case neon_cvt_mode_x: inst.instruction |= 0x00010000; break; - case neon_cvt_mode_a: inst.instruction |= 0xf0000000; break; - case neon_cvt_mode_n: inst.instruction |= 0xf0010000; break; - case neon_cvt_mode_p: inst.instruction |= 0xf0020000; break; - case neon_cvt_mode_m: inst.instruction |= 0xf0030000; break; - default: abort (); - } + inst.instruction |= Rd << 12; + inst.instruction |= Rn << 16; - inst.instruction |= (rs == NS_DD) << 8; - do_vfp_cond_or_thumb (); + inst.instruction |= (imm & 0x0380) << 13; + inst.instruction |= (imm & 0x0040) << 1; + inst.instruction |= (imm & 0x003f); - /* ARMv8.2 fp16 vrint instruction. */ - if (rs == NS_HH) - do_scalar_fp16_v82_encode (); + cde_handle_coproc (); + cxn_handle_predication (is_accum); +} + +static void +do_custom_instruction_3 (int is_dual, bfd_boolean is_accum) +{ + + constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE)); + + unsigned imm, Rd, Rn, Rm; + + Rd = inst.operands[1].reg; + + if (is_dual == 1) + { + constraint (inst.operands[2].reg != Rd + 1, + _("cx3d requires consecutive destination registers.")); + imm = inst.operands[5].imm; + Rn = inst.operands[3].reg; + Rm = inst.operands[4].reg; } + else if (is_dual == 0) + { + imm = inst.operands[4].imm; + Rn = inst.operands[2].reg; + Rm = inst.operands[3].reg; + } else - { - /* Neon encodings (or something broken...). */ - inst.error = NULL; - et = neon_check_type (2, rs, N_EQK, N_F_16_32 | N_KEY); + abort (); - if (et.type == NT_invtype) - return; + check_cde_operand (1, is_dual); + check_cde_operand (2 + is_dual, /* is_dual = */0); + check_cde_operand (3 + is_dual, /* is_dual = */0); - set_pred_insn_type (OUTSIDE_PRED_INSN); - NEON_ENCODE (FLOAT, inst); + inst.instruction |= Rd; + inst.instruction |= Rn << 16; + inst.instruction |= Rm << 12; - if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL) - return; + inst.instruction |= (imm & 0x0038) << 17; + inst.instruction |= (imm & 0x0004) << 5; + inst.instruction |= (imm & 0x0003) << 4; - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg); - inst.instruction |= HI1 (inst.operands[1].reg) << 5; - inst.instruction |= neon_quad (rs) << 6; - /* Mask off the original size bits and reencode them. */ - inst.instruction = ((inst.instruction & 0xfff3ffff) - | neon_logbits (et.size) << 18); + cde_handle_coproc (); + cxn_handle_predication (is_accum); +} - switch (mode) - { - case neon_cvt_mode_z: inst.instruction |= 3 << 7; break; - case neon_cvt_mode_x: inst.instruction |= 1 << 7; break; - case neon_cvt_mode_a: inst.instruction |= 2 << 7; break; - case neon_cvt_mode_n: inst.instruction |= 0 << 7; break; - case neon_cvt_mode_p: inst.instruction |= 7 << 7; break; - case neon_cvt_mode_m: inst.instruction |= 5 << 7; break; - case neon_cvt_mode_r: inst.error = _("invalid rounding mode"); break; - default: abort (); - } +static void +do_cx1 (void) +{ + return do_custom_instruction_1 (0, 0); +} - if (thumb_mode) - inst.instruction |= 0xfc000000; - else - inst.instruction |= 0xf0000000; - } +static void +do_cx1a (void) +{ + return do_custom_instruction_1 (0, 1); +} + +static void +do_cx1d (void) +{ + return do_custom_instruction_1 (1, 0); +} + +static void +do_cx1da (void) +{ + return do_custom_instruction_1 (1, 1); +} + +static void +do_cx2 (void) +{ + return do_custom_instruction_2 (0, 0); +} + +static void +do_cx2a (void) +{ + return do_custom_instruction_2 (0, 1); +} + +static void +do_cx2d (void) +{ + return do_custom_instruction_2 (1, 0); +} + +static void +do_cx2da (void) +{ + return do_custom_instruction_2 (1, 1); } static void -do_vrintx (void) +do_cx3 (void) { - do_vrint_1 (neon_cvt_mode_x); + return do_custom_instruction_3 (0, 0); } static void -do_vrintz (void) +do_cx3a (void) { - do_vrint_1 (neon_cvt_mode_z); + return do_custom_instruction_3 (0, 1); } static void -do_vrintr (void) +do_cx3d (void) { - do_vrint_1 (neon_cvt_mode_r); + return do_custom_instruction_3 (1, 0); } static void -do_vrinta (void) +do_cx3da (void) { - do_vrint_1 (neon_cvt_mode_a); + return do_custom_instruction_3 (1, 1); } static void -do_vrintn (void) +vcx_assign_vec_d (unsigned regnum) { - do_vrint_1 (neon_cvt_mode_n); + inst.instruction |= HI4 (regnum) << 12; + inst.instruction |= LOW1 (regnum) << 22; } static void -do_vrintp (void) +vcx_assign_vec_m (unsigned regnum) { - do_vrint_1 (neon_cvt_mode_p); + inst.instruction |= HI4 (regnum); + inst.instruction |= LOW1 (regnum) << 5; } static void -do_vrintm (void) +vcx_assign_vec_n (unsigned regnum) { - do_vrint_1 (neon_cvt_mode_m); + inst.instruction |= HI4 (regnum) << 16; + inst.instruction |= LOW1 (regnum) << 7; } +enum vcx_reg_type { + q_reg, + d_reg, + s_reg +}; + +static enum vcx_reg_type +vcx_get_reg_type (enum neon_shape ns) +{ + gas_assert (ns == NS_PQI + || ns == NS_PDI + || ns == NS_PFI + || ns == NS_PQQI + || ns == NS_PDDI + || ns == NS_PFFI + || ns == NS_PQQQI + || ns == NS_PDDDI + || ns == NS_PFFFI); + if (ns == NS_PQI || ns == NS_PQQI || ns == NS_PQQQI) + return q_reg; + if (ns == NS_PDI || ns == NS_PDDI || ns == NS_PDDDI) + return d_reg; + return s_reg; +} + +#define vcx_size_pos 24 +#define vcx_vec_pos 6 static unsigned -neon_scalar_for_vcmla (unsigned opnd, unsigned elsize) +vcx_handle_shape (enum vcx_reg_type reg_type) { - unsigned regno = NEON_SCALAR_REG (opnd); - unsigned elno = NEON_SCALAR_INDEX (opnd); - - if (elsize == 16 && elno < 2 && regno < 16) - return regno | (elno << 4); - else if (elsize == 32 && elno == 0) - return regno; - - first_error (_("scalar out of range")); - return 0; + unsigned mult = 2; + if (reg_type == q_reg) + inst.instruction |= 1 << vcx_vec_pos; + else if (reg_type == d_reg) + inst.instruction |= 1 << vcx_size_pos; + else + mult = 1; + /* NOTE: + The documentation says that the Q registers are encoded as 2*N in the D:Vd + bits (or equivalent for N and M registers). + Similarly the D registers are encoded as N in D:Vd bits. + While the S registers are encoded as N in the Vd:D bits. + + Taking into account the maximum values of these registers we can see a + nicer pattern for calculation: + Q -> 7, D -> 15, S -> 31 + + If we say that everything is encoded in the Vd:D bits, then we can say + that Q is encoded as 4*N, and D is encoded as 2*N. + This way the bits will end up the same, and calculation is simpler. + (calculation is now: + 1. Multiply by a number determined by the register letter. + 2. Encode resulting number in Vd:D bits.) + + This is made a little more complicated by automatic handling of 'Q' + registers elsewhere, which means the register number is already 2*N where + N is the number the user wrote after the register letter. + */ + return mult; } +#undef vcx_vec_pos +#undef vcx_size_pos static void -do_vcmla (void) +vcx_ensure_register_in_range (unsigned R, enum vcx_reg_type reg_type) { - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8), - _(BAD_FPU)); - constraint (inst.relocs[0].exp.X_op != O_constant, - _("expression too complex")); - unsigned rot = inst.relocs[0].exp.X_add_number; - constraint (rot != 0 && rot != 90 && rot != 180 && rot != 270, - _("immediate out of range")); - rot /= 90; - if (inst.operands[2].isscalar) + if (reg_type == q_reg) { - enum neon_shape rs = neon_select_shape (NS_DDSI, NS_QQSI, NS_NULL); - unsigned size = neon_check_type (3, rs, N_EQK, N_EQK, - N_KEY | N_F16 | N_F32).size; - unsigned m = neon_scalar_for_vcmla (inst.operands[2].reg, size); - inst.is_neon = 1; - inst.instruction = 0xfe000800; - inst.instruction |= LOW4 (inst.operands[0].reg) << 12; - inst.instruction |= HI1 (inst.operands[0].reg) << 22; - inst.instruction |= LOW4 (inst.operands[1].reg) << 16; - inst.instruction |= HI1 (inst.operands[1].reg) << 7; - inst.instruction |= LOW4 (m); - inst.instruction |= HI1 (m) << 5; - inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= rot << 20; - inst.instruction |= (size == 32) << 23; + gas_assert (R % 2 == 0); + constraint (R >= 16, _("'q' register must be in range 0-7")); } + else if (reg_type == d_reg) + constraint (R >= 16, _("'d' register must be in range 0-15")); else - { - enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL); - unsigned size = neon_check_type (3, rs, N_EQK, N_EQK, - N_KEY | N_F16 | N_F32).size; - neon_three_same (neon_quad (rs), 0, -1); - inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */ - inst.instruction |= 0xfc200800; - inst.instruction |= rot << 23; - inst.instruction |= (size == 32) << 20; - } + constraint (R >= 32, _("'s' register must be in range 0-31")); } +static void (*vcx_assign_vec[3]) (unsigned) = { + vcx_assign_vec_d, + vcx_assign_vec_m, + vcx_assign_vec_n +}; + static void -do_vcadd (void) +vcx_handle_register_arguments (unsigned num_registers, + enum vcx_reg_type reg_type) { - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8), - _(BAD_FPU)); - constraint (inst.relocs[0].exp.X_op != O_constant, - _("expression too complex")); - unsigned rot = inst.relocs[0].exp.X_add_number; - constraint (rot != 90 && rot != 270, _("immediate out of range")); - enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL); - unsigned size = neon_check_type (3, rs, N_EQK, N_EQK, - N_KEY | N_F16 | N_F32).size; - neon_three_same (neon_quad (rs), 0, -1); - inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */ - inst.instruction |= 0xfc800800; - inst.instruction |= (rot == 270) << 24; - inst.instruction |= (size == 32) << 20; + unsigned R, i; + unsigned reg_mult = vcx_handle_shape (reg_type); + for (i = 0; i < num_registers; i++) + { + R = inst.operands[i+1].reg; + vcx_ensure_register_in_range (R, reg_type); + if (num_registers == 3 && i > 0) + { + if (i == 2) + vcx_assign_vec[1] (R * reg_mult); + else + vcx_assign_vec[2] (R * reg_mult); + continue; + } + vcx_assign_vec[i](R * reg_mult); + } } -/* Dot Product instructions encoding support. */ - static void -do_neon_dotproduct (int unsigned_p) +vcx_handle_insn_block (enum vcx_reg_type reg_type) { - enum neon_shape rs; - unsigned scalar_oprd2 = 0; - int high8; - - if (inst.cond != COND_ALWAYS) - as_warn (_("Dot Product instructions cannot be conditional, the behaviour " - "is UNPREDICTABLE")); - - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8), - _(BAD_FPU)); - - /* Dot Product instructions are in three-same D/Q register format or the third - operand can be a scalar index register. */ - if (inst.operands[2].isscalar) - { - scalar_oprd2 = neon_scalar_for_mul (inst.operands[2].reg, 32); - high8 = 0xfe000000; - rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); - } + if (reg_type == q_reg) + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + else if (inst.cond == COND_ALWAYS) + inst.pred_insn_type = OUTSIDE_PRED_INSN; else - { - high8 = 0xfc000000; - rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); - } + inst.error = BAD_NOT_IT; +} - if (unsigned_p) - neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_U8); +static void +vcx_handle_common_checks (unsigned num_args, enum neon_shape rs) +{ + constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE)); + cde_handle_coproc (); + enum vcx_reg_type reg_type = vcx_get_reg_type (rs); + vcx_handle_register_arguments (num_args, reg_type); + vcx_handle_insn_block (reg_type); + if (reg_type == q_reg) + constraint (!mark_feature_used (&mve_ext), + _("vcx instructions with Q registers require MVE")); else - neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_S8); + constraint (!(ARM_FSET_CPU_SUBSET (armv8m_fp, cpu_variant) + && mark_feature_used (&armv8m_fp)) + && !mark_feature_used (&mve_ext), + _("vcx instructions with S or D registers require either MVE" + " or Armv8-M floating point etension.")); +} - /* The "U" bit in traditional Three Same encoding is fixed to 0 for Dot - Product instruction, so we pass 0 as the "ubit" parameter. And the - "Size" field are fixed to 0x2, so we pass 32 as the "size" parameter. */ - neon_three_same (neon_quad (rs), 0, 32); +static void +do_vcx1 (void) +{ + enum neon_shape rs = neon_select_shape (NS_PQI, NS_PDI, NS_PFI, NS_NULL); + vcx_handle_common_checks (1, rs); - /* Undo neon_dp_fixup. Dot Product instructions are using a slightly - different NEON three-same encoding. */ - inst.instruction &= 0x00ffffff; - inst.instruction |= high8; - /* Encode 'U' bit which indicates signedness. */ - inst.instruction |= (unsigned_p ? 1 : 0) << 4; - /* Re-encode operand2 if it's indexed scalar operand. What has been encoded - from inst.operand[2].reg in neon_three_same is GAS's internal encoding, not - the instruction encoding. */ - if (inst.operands[2].isscalar) - { - inst.instruction &= 0xffffffd0; - inst.instruction |= LOW4 (scalar_oprd2); - inst.instruction |= HI1 (scalar_oprd2) << 5; - } + unsigned imm = inst.operands[2].imm; + inst.instruction |= (imm & 0x03f); + inst.instruction |= (imm & 0x040) << 1; + inst.instruction |= (imm & 0x780) << 9; + if (rs != NS_PQI) + constraint (imm >= 2048, + _("vcx1 with S or D registers takes immediate within 0-2047")); + inst.instruction |= (imm & 0x800) << 13; } -/* Dot Product instructions for signed integer. */ - static void -do_neon_dotproduct_s (void) +do_vcx2 (void) { - return do_neon_dotproduct (0); -} + enum neon_shape rs = neon_select_shape (NS_PQQI, NS_PDDI, NS_PFFI, NS_NULL); + vcx_handle_common_checks (2, rs); -/* Dot Product instructions for unsigned integer. */ + unsigned imm = inst.operands[3].imm; + inst.instruction |= (imm & 0x01) << 4; + inst.instruction |= (imm & 0x02) << 6; + inst.instruction |= (imm & 0x3c) << 14; + if (rs != NS_PQQI) + constraint (imm >= 64, + _("vcx2 with S or D registers takes immediate within 0-63")); + inst.instruction |= (imm & 0x40) << 18; +} static void -do_neon_dotproduct_u (void) +do_vcx3 (void) { - return do_neon_dotproduct (1); + enum neon_shape rs = neon_select_shape (NS_PQQQI, NS_PDDDI, NS_PFFFI, NS_NULL); + vcx_handle_common_checks (3, rs); + + unsigned imm = inst.operands[4].imm; + inst.instruction |= (imm & 0x1) << 4; + inst.instruction |= (imm & 0x6) << 19; + if (rs != NS_PQQQI) + constraint (imm >= 8, + _("vcx2 with S or D registers takes immediate within 0-7")); + inst.instruction |= (imm & 0x8) << 21; } /* Crypto v1 instructions. */ @@ -19968,6 +22264,46 @@ do_vjcvt (void) do_vfp_cond_or_thumb (); } +static void +do_vdot (void) +{ + enum neon_shape rs; + constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU)); + set_pred_insn_type (OUTSIDE_PRED_INSN); + if (inst.operands[2].isscalar) + { + rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY); + + inst.instruction |= (1 << 25); + int index = inst.operands[2].reg & 0xf; + constraint ((index != 1 && index != 0), _("index must be 0 or 1")); + inst.operands[2].reg >>= 4; + constraint (!(inst.operands[2].reg < 16), + _("indexed register must be less than 16")); + neon_three_args (rs == NS_QQS); + inst.instruction |= (index << 5); + } + else + { + rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY); + neon_three_args (rs == NS_QQQ); + } +} + +static void +do_vmmla (void) +{ + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY); + + constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU)); + set_pred_insn_type (OUTSIDE_PRED_INSN); + + neon_three_args (1); +} + /* Overall per-instruction processing. */ @@ -20704,6 +23040,7 @@ handle_pred_state (void) close_automatic_it_block (); break; + /* Fallthrough. */ case NEUTRAL_IT_INSN: now_pred.block_length++; now_pred.insn_cond = TRUE; @@ -20956,9 +23293,11 @@ it_fsm_post_encode (void) handle_pred_state (); if (now_pred.insn_cond + && warn_on_restrict_it && !now_pred.warn_deprecated && warn_on_deprecated - && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8) + && (ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8) + || ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8r)) && !ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_m)) { if (inst.instruction >= 0x10000) @@ -21356,7 +23695,7 @@ arm_frob_label (symbolS * sym) out of the jump table, and chaos would ensue. */ if (label_is_thumb_function_name && (S_GET_NAME (sym)[0] != '.' || S_GET_NAME (sym)[1] != 'L') - && (bfd_get_section_flags (stdoutput, now_seg) & SEC_CODE) != 0) + && (bfd_section_flags (now_seg) & SEC_CODE) != 0) { /* When the address of a Thumb function is taken the bottom bit of that address should be set. This will allow @@ -21516,6 +23855,10 @@ static const struct reg_entry reg_names[] = REGDEF(mvfr0,7,VFC), REGDEF(mvfr1,6,VFC), REGDEF(MVFR0,7,VFC), REGDEF(MVFR1,6,VFC), REGDEF(mvfr2,5,VFC), REGDEF(MVFR2,5,VFC), + REGDEF(fpscr_nzcvqc,2,VFC), REGDEF(FPSCR_nzcvqc,2,VFC), + REGDEF(vpr,12,VFC), REGDEF(VPR,12,VFC), + REGDEF(fpcxt_ns,14,VFC), REGDEF(FPCXT_NS,14,VFC), + REGDEF(fpcxt_s,15,VFC), REGDEF(FPCXT_S,15,VFC), /* Maverick DSP coprocessor registers. */ REGSET(mvf,MVF), REGSET(mvd,MVD), REGSET(mvfx,MVFX), REGSET(mvdx,MVDX), @@ -22636,15 +24979,13 @@ static const struct asm_opcode insns[] = nUF(vselvs, _vselvs, 3, (RVSD, RVSD, RVSD), vsel), nUF(vselge, _vselge, 3, (RVSD, RVSD, RVSD), vsel), nUF(vselgt, _vselgt, 3, (RVSD, RVSD, RVSD), vsel), - nUF(vmaxnm, _vmaxnm, 3, (RNSDQ, oRNSDQ, RNSDQ), vmaxnm), - nUF(vminnm, _vminnm, 3, (RNSDQ, oRNSDQ, RNSDQ), vmaxnm), nCE(vrintr, _vrintr, 2, (RNSDQ, oRNSDQ), vrintr), - nCE(vrintz, _vrintr, 2, (RNSDQ, oRNSDQ), vrintz), - nCE(vrintx, _vrintr, 2, (RNSDQ, oRNSDQ), vrintx), - nUF(vrinta, _vrinta, 2, (RNSDQ, oRNSDQ), vrinta), - nUF(vrintn, _vrinta, 2, (RNSDQ, oRNSDQ), vrintn), - nUF(vrintp, _vrinta, 2, (RNSDQ, oRNSDQ), vrintp), - nUF(vrintm, _vrinta, 2, (RNSDQ, oRNSDQ), vrintm), + mnCE(vrintz, _vrintr, 2, (RNSDQMQ, oRNSDQMQ), vrintz), + mnCE(vrintx, _vrintr, 2, (RNSDQMQ, oRNSDQMQ), vrintx), + mnUF(vrinta, _vrinta, 2, (RNSDQMQ, oRNSDQMQ), vrinta), + mnUF(vrintn, _vrinta, 2, (RNSDQMQ, oRNSDQMQ), vrintn), + mnUF(vrintp, _vrinta, 2, (RNSDQMQ, oRNSDQMQ), vrintp), + mnUF(vrintm, _vrinta, 2, (RNSDQMQ, oRNSDQMQ), vrintm), /* Crypto v1 extensions. */ #undef ARM_VARIANT @@ -22668,9 +25009,9 @@ static const struct asm_opcode insns[] = nUF(sha256su0, _sha2op, 2, (RNQ, RNQ), sha256su0), #undef ARM_VARIANT -#define ARM_VARIANT & crc_ext_armv8 +#define ARM_VARIANT & arm_ext_crc #undef THUMB_VARIANT -#define THUMB_VARIANT & crc_ext_armv8 +#define THUMB_VARIANT & arm_ext_crc TUEc("crc32b", 1000040, fac0f080, 3, (RR, oRR, RR), crc32b), TUEc("crc32h", 1200040, fac0f090, 3, (RR, oRR, RR), crc32h), TUEc("crc32w", 1400040, fac0f0a0, 3, (RR, oRR, RR), crc32w), @@ -22690,8 +25031,6 @@ static const struct asm_opcode insns[] = #undef THUMB_VARIANT #define THUMB_VARIANT & arm_ext_v8_3 NCE (vjcvt, eb90bc0, 2, (RVS, RVD), vjcvt), - NUF (vcmla, 0, 4, (RNDQ, RNDQ, RNDQ_RNSC, EXPi), vcmla), - NUF (vcadd, 0, 4, (RNDQ, RNDQ, RNDQ, EXPi), vcadd), #undef ARM_VARIANT #define ARM_VARIANT & fpu_neon_ext_dotprod @@ -23147,11 +25486,24 @@ static const struct asm_opcode insns[] = #undef ARM_VARIANT #define ARM_VARIANT & fpu_vfp_ext_v1xd /* VFP V1xD (single precision). */ +#undef THUMB_VARIANT +#define THUMB_VARIANT & arm_ext_v6t2 + mcCE(vmrs, ef00a10, 2, (APSR_RR, RVC), vmrs), + mcCE(vmsr, ee00a10, 2, (RVC, RR), vmsr), + mcCE(fldd, d100b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst), + mcCE(fstd, d000b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst), + mcCE(flds, d100a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst), + mcCE(fsts, d000a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst), + + /* Memory operations. */ + mcCE(fldmias, c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia), + mcCE(fldmdbs, d300a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb), + mcCE(fstmias, c800a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia), + mcCE(fstmdbs, d200a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb), +#undef THUMB_VARIANT /* Moves and type conversions. */ cCE("fmstat", ef1fa10, 0, (), noargs), - cCE("vmrs", ef00a10, 2, (APSR_RR, RVC), vmrs), - cCE("vmsr", ee00a10, 2, (RVC, RR), vmsr), cCE("fsitos", eb80ac0, 2, (RVS, RVS), vfp_sp_monadic), cCE("fuitos", eb80a40, 2, (RVS, RVS), vfp_sp_monadic), cCE("ftosis", ebd0a40, 2, (RVS, RVS), vfp_sp_monadic), @@ -23162,19 +25514,13 @@ static const struct asm_opcode insns[] = cCE("fmxr", ee00a10, 2, (RVC, RR), rn_rd), /* Memory operations. */ - cCE("flds", d100a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst), - cCE("fsts", d000a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst), - cCE("fldmias", c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia), cCE("fldmfds", c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia), - cCE("fldmdbs", d300a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb), cCE("fldmeas", d300a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb), cCE("fldmiax", c900b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmia), cCE("fldmfdx", c900b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmia), cCE("fldmdbx", d300b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmdb), cCE("fldmeax", d300b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmdb), - cCE("fstmias", c800a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia), cCE("fstmeas", c800a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia), - cCE("fstmdbs", d200a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb), cCE("fstmfds", d200a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb), cCE("fstmiax", c800b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmia), cCE("fstmeax", c800b00, 2, (RRnpctw, VRDLST), vfp_xp_ldstmia), @@ -23205,8 +25551,6 @@ static const struct asm_opcode insns[] = /* Double precision load/store are still present on single precision implementations. */ - cCE("fldd", d100b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst), - cCE("fstd", d000b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst), cCE("fldmiad", c900b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmia), cCE("fldmfdd", c900b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmia), cCE("fldmdbd", d300b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmdb), @@ -23259,6 +25603,19 @@ static const struct asm_opcode insns[] = Individual encoder functions perform additional architecture checks. */ #undef ARM_VARIANT #define ARM_VARIANT & fpu_vfp_ext_v1xd +#undef THUMB_VARIANT +#define THUMB_VARIANT & arm_ext_v6t2 + + NCE(vldm, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vldmia, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vldmdb, d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vstm, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vstmia, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vstmdb, d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + + NCE(vpop, 0, 1, (VRSDLST), vfp_nsyn_pop), + NCE(vpush, 0, 1, (VRSDLST), vfp_nsyn_push), + #undef THUMB_VARIANT #define THUMB_VARIANT & fpu_vfp_ext_v1xd @@ -23268,22 +25625,11 @@ static const struct asm_opcode insns[] = nCE(vnmul, _vnmul, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul), nCE(vnmla, _vnmla, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul), nCE(vnmls, _vnmls, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul), - NCE(vpush, 0, 1, (VRSDLST), vfp_nsyn_push), - NCE(vpop, 0, 1, (VRSDLST), vfp_nsyn_pop), NCE(vcvtz, 0, 2, (RVSD, RVSD), vfp_nsyn_cvtz), /* Mnemonics shared by Neon and VFP. */ - nCEF(vmul, _vmul, 3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mul), - nCEF(vmla, _vmla, 3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar), nCEF(vmls, _vmls, 3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar), - NCE(vldm, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vldmia, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vldmdb, d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vstm, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vstmia, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vstmdb, d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - mnCEF(vcvt, _vcvt, 3, (RNSDQMQ, RNSDQMQ, oI32z), neon_cvt), nCEF(vcvtr, _vcvt, 2, (RNSDQ, RNSDQ), neon_cvtr), MNCEF(vcvtb, eb20a40, 3, (RVSDMQ, RVSDMQ, oI32b), neon_cvtb), @@ -23312,8 +25658,8 @@ static const struct asm_opcode insns[] = NCE (vins, eb00ac0, 2, (RVS, RVS), neon_movhf), /* New backported fma/fms instructions optional in v8.2. */ - NCE (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal), - NCE (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl), + NUF (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl), + NUF (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal), #undef THUMB_VARIANT #define THUMB_VARIANT & fpu_neon_ext_v1 @@ -23324,38 +25670,24 @@ static const struct asm_opcode insns[] = /* integer ops, valid types S8 S16 S32 U8 U16 U32. */ NUF(vaba, 0000710, 3, (RNDQ, RNDQ, RNDQ), neon_dyadic_i_su), NUF(vabaq, 0000710, 3, (RNQ, RNQ, RNQ), neon_dyadic_i_su), - NUF(vhadd, 0000000, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su), NUF(vhaddq, 0000000, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su), - NUF(vrhadd, 0000100, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su), NUF(vrhaddq, 0000100, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su), - NUF(vhsub, 0000200, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su), NUF(vhsubq, 0000200, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su), /* integer ops, valid types S8 S16 S32 S64 U8 U16 U32 U64. */ - NUF(vqadd, 0000010, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su), NUF(vqaddq, 0000010, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su), - NUF(vqsub, 0000210, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su), NUF(vqsubq, 0000210, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su), - NUF(vrshl, 0000500, 3, (RNDQ, oRNDQ, RNDQ), neon_rshl), NUF(vrshlq, 0000500, 3, (RNQ, oRNQ, RNQ), neon_rshl), - NUF(vqrshl, 0000510, 3, (RNDQ, oRNDQ, RNDQ), neon_rshl), NUF(vqrshlq, 0000510, 3, (RNQ, oRNQ, RNQ), neon_rshl), /* If not immediate, fall back to neon_dyadic_i64_su. - shl_imm should accept I8 I16 I32 I64, - qshl_imm should accept S8 S16 S32 S64 U8 U16 U32 U64. */ - nUF(vshl, _vshl, 3, (RNDQ, oRNDQ, RNDQ_I63b), neon_shl_imm), - nUF(vshlq, _vshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_shl_imm), - nUF(vqshl, _vqshl, 3, (RNDQ, oRNDQ, RNDQ_I63b), neon_qshl_imm), - nUF(vqshlq, _vqshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_qshl_imm), + shl should accept I8 I16 I32 I64, + qshl should accept S8 S16 S32 S64 U8 U16 U32 U64. */ + nUF(vshlq, _vshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_shl), + nUF(vqshlq, _vqshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_qshl), /* Logic ops, types optional & ignored. */ - nUF(vand, _vand, 3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic), nUF(vandq, _vand, 3, (RNQ, oRNQ, RNDQ_Ibig), neon_logic), - nUF(vbic, _vbic, 3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic), nUF(vbicq, _vbic, 3, (RNQ, oRNQ, RNDQ_Ibig), neon_logic), - nUF(vorr, _vorr, 3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic), nUF(vorrq, _vorr, 3, (RNQ, oRNQ, RNDQ_Ibig), neon_logic), - nUF(vorn, _vorn, 3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic), nUF(vornq, _vorn, 3, (RNQ, oRNQ, RNDQ_Ibig), neon_logic), - nUF(veor, _veor, 3, (RNDQ, oRNDQ, RNDQ), neon_logic), nUF(veorq, _veor, 3, (RNQ, oRNQ, RNQ), neon_logic), /* Bitfield ops, untyped. */ NUF(vbsl, 1100110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield), @@ -23366,9 +25698,7 @@ static const struct asm_opcode insns[] = NUF(vbifq, 1300110, 3, (RNQ, RNQ, RNQ), neon_bitfield), /* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32. */ nUF(vabdq, _vabd, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su), - nUF(vmax, _vmax, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su), nUF(vmaxq, _vmax, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su), - nUF(vmin, _vmin, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su), nUF(vminq, _vmin, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su), /* Comparisons. Types S8 S16 S32 U8 U16 U32 F32. Non-immediate versions fall back to neon_dyadic_if_su. */ @@ -23399,9 +25729,7 @@ static const struct asm_opcode insns[] = /* VMUL takes I8 I16 I32 F32 P8. */ nUF(vmulq, _vmul, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_mul), /* VQD{R}MULH takes S16 S32. */ - nUF(vqdmulh, _vqdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh), nUF(vqdmulhq, _vqdmulh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh), - nUF(vqrdmulh, _vqrdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh), nUF(vqrdmulhq, _vqrdmulh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh), NUF(vacge, 0000e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute), NUF(vacgeq, 0000e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute), @@ -23416,7 +25744,6 @@ static const struct asm_opcode insns[] = NUF(vrsqrts, 0200f10, 3, (RNDQ, oRNDQ, RNDQ), neon_step), NUF(vrsqrtsq, 0200f10, 3, (RNQ, oRNQ, RNQ), neon_step), /* ARM v8.1 extension. */ - nUF (vqrdmlah, _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah), nUF (vqrdmlahq, _vqrdmlah, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qrdmlah), nUF (vqrdmlsh, _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah), nUF (vqrdmlshq, _vqrdmlsh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qrdmlah), @@ -23428,21 +25755,16 @@ static const struct asm_opcode insns[] = /* Data processing with two registers and a shift amount. */ /* Right shifts, and variants with rounding. Types accepted S8 S16 S32 S64 U8 U16 U32 U64. */ - NUF(vshr, 0800010, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm), NUF(vshrq, 0800010, 3, (RNQ, oRNQ, I64z), neon_rshift_round_imm), - NUF(vrshr, 0800210, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm), NUF(vrshrq, 0800210, 3, (RNQ, oRNQ, I64z), neon_rshift_round_imm), NUF(vsra, 0800110, 3, (RNDQ, oRNDQ, I64), neon_rshift_round_imm), NUF(vsraq, 0800110, 3, (RNQ, oRNQ, I64), neon_rshift_round_imm), NUF(vrsra, 0800310, 3, (RNDQ, oRNDQ, I64), neon_rshift_round_imm), NUF(vrsraq, 0800310, 3, (RNQ, oRNQ, I64), neon_rshift_round_imm), /* Shift and insert. Sizes accepted 8 16 32 64. */ - NUF(vsli, 1800510, 3, (RNDQ, oRNDQ, I63), neon_sli), NUF(vsliq, 1800510, 3, (RNQ, oRNQ, I63), neon_sli), - NUF(vsri, 1800410, 3, (RNDQ, oRNDQ, I64), neon_sri), NUF(vsriq, 1800410, 3, (RNQ, oRNQ, I64), neon_sri), /* QSHL{U} immediate accepts S8 S16 S32 S64 U8 U16 U32 U64. */ - NUF(vqshlu, 1800610, 3, (RNDQ, oRNDQ, I63), neon_qshlu_imm), NUF(vqshluq, 1800610, 3, (RNQ, oRNQ, I63), neon_qshlu_imm), /* Right shift immediate, saturating & narrowing, with rounding variants. Types accepted S16 S32 S64 U16 U32 U64. */ @@ -23459,7 +25781,6 @@ static const struct asm_opcode insns[] = /* CVT with optional immediate for fixed-point variant. */ nUF(vcvtq, _vcvt, 3, (RNQ, RNQ, oI32b), neon_cvt), - nUF(vmvn, _vmvn, 2, (RNDQ, RNDQ_Ibig), neon_mvn), nUF(vmvnq, _vmvn, 2, (RNQ, RNDQ_Ibig), neon_mvn), /* Data processing, three registers of different lengths. */ @@ -23491,14 +25812,10 @@ static const struct asm_opcode insns[] = /* Two registers, miscellaneous. */ /* Reverse. Sizes 8 16 32 (must be < size in opcode). */ - NUF(vrev64, 1b00000, 2, (RNDQ, RNDQ), neon_rev), NUF(vrev64q, 1b00000, 2, (RNQ, RNQ), neon_rev), - NUF(vrev32, 1b00080, 2, (RNDQ, RNDQ), neon_rev), NUF(vrev32q, 1b00080, 2, (RNQ, RNQ), neon_rev), - NUF(vrev16, 1b00100, 2, (RNDQ, RNDQ), neon_rev), NUF(vrev16q, 1b00100, 2, (RNQ, RNQ), neon_rev), /* Vector replicate. Sizes 8 16 32. */ - nCE(vdup, _vdup, 2, (RNDQ, RR_RNSC), neon_dup), nCE(vdupq, _vdup, 2, (RNQ, RR_RNSC), neon_dup), /* VMOVL. Types S8 S16 S32 U8 U16 U32. */ NUF(vmovl, 0800a10, 2, (RNQ, RND), neon_movl), @@ -23514,9 +25831,7 @@ static const struct asm_opcode insns[] = NUF(vuzp, 1b20100, 2, (RNDQ, RNDQ), neon_zip_uzp), NUF(vuzpq, 1b20100, 2, (RNQ, RNQ), neon_zip_uzp), /* VQABS / VQNEG. Types S8 S16 S32. */ - NUF(vqabs, 1b00700, 2, (RNDQ, RNDQ), neon_sat_abs_neg), NUF(vqabsq, 1b00700, 2, (RNQ, RNQ), neon_sat_abs_neg), - NUF(vqneg, 1b00780, 2, (RNDQ, RNDQ), neon_sat_abs_neg), NUF(vqnegq, 1b00780, 2, (RNQ, RNQ), neon_sat_abs_neg), /* Pairwise, lengthening. Types S8 S16 S32 U8 U16 U32. */ NUF(vpadal, 1b00600, 2, (RNDQ, RNDQ), neon_pair_long), @@ -23529,10 +25844,8 @@ static const struct asm_opcode insns[] = NUF(vrsqrte, 1b30480, 2, (RNDQ, RNDQ), neon_recip_est), NUF(vrsqrteq, 1b30480, 2, (RNQ, RNQ), neon_recip_est), /* VCLS. Types S8 S16 S32. */ - NUF(vcls, 1b00400, 2, (RNDQ, RNDQ), neon_cls), NUF(vclsq, 1b00400, 2, (RNQ, RNQ), neon_cls), /* VCLZ. Types I8 I16 I32. */ - NUF(vclz, 1b00480, 2, (RNDQ, RNDQ), neon_clz), NUF(vclzq, 1b00480, 2, (RNQ, RNQ), neon_clz), /* VCNT. Size 8. */ NUF(vcnt, 1b00500, 2, (RNDQ, RNDQ), neon_cnt), @@ -23596,11 +25909,13 @@ static const struct asm_opcode insns[] = #define ARM_VARIANT & fpu_vfp_ext_fma #undef THUMB_VARIANT #define THUMB_VARIANT & fpu_vfp_ext_fma - /* Mnemonics shared by Neon and VFP. These are included in the + /* Mnemonics shared by Neon, VFP, MVE and BF16. These are included in the VFP FMA variant; NEON and VFP FMA always includes the NEON FMA instructions. */ - nCEF(vfma, _vfma, 3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac), - nCEF(vfms, _vfms, 3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac), + mnCEF(vfma, _vfma, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR), neon_fmac), + TUF ("vfmat", c300850, fc300850, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), mve_vfma, mve_vfma), + mnCEF(vfms, _vfms, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ), neon_fmac), + /* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas; the v form should always be used. */ cCE("ffmas", ea00a00, 3, (RVS, RVS, RVS), vfp_sp_dyadic), @@ -23968,6 +26283,16 @@ static const struct asm_opcode insns[] = /* Armv8.1-M Mainline instructions. */ #undef THUMB_VARIANT #define THUMB_VARIANT & arm_ext_v8_1m_main + toU("cinc", _cinc, 3, (RRnpcsp, RR_ZR, COND), t_cond), + toU("cinv", _cinv, 3, (RRnpcsp, RR_ZR, COND), t_cond), + toU("cneg", _cneg, 3, (RRnpcsp, RR_ZR, COND), t_cond), + toU("csel", _csel, 4, (RRnpcsp, RR_ZR, RR_ZR, COND), t_cond), + toU("csetm", _csetm, 2, (RRnpcsp, COND), t_cond), + toU("cset", _cset, 2, (RRnpcsp, COND), t_cond), + toU("csinc", _csinc, 4, (RRnpcsp, RR_ZR, RR_ZR, COND), t_cond), + toU("csinv", _csinv, 4, (RRnpcsp, RR_ZR, RR_ZR, COND), t_cond), + toU("csneg", _csneg, 4, (RRnpcsp, RR_ZR, RR_ZR, COND), t_cond), + toC("bf", _bf, 2, (EXPs, EXPs), t_branch_future), toU("bfcsel", _bfcsel, 4, (EXPs, EXPs, EXPs, COND), t_branch_future), toC("bfx", _bfx, 2, (EXPs, RRnpcsp), t_branch_future), @@ -23983,6 +26308,21 @@ static const struct asm_opcode insns[] = #undef THUMB_VARIANT #define THUMB_VARIANT & mve_ext + ToC("lsll", ea50010d, 3, (RRe, RRo, RRnpcsp_I32), mve_scalar_shift), + ToC("lsrl", ea50011f, 3, (RRe, RRo, I32), mve_scalar_shift), + ToC("asrl", ea50012d, 3, (RRe, RRo, RRnpcsp_I32), mve_scalar_shift), + ToC("uqrshll", ea51010d, 4, (RRe, RRo, I48_I64, RRnpcsp), mve_scalar_shift1), + ToC("sqrshrl", ea51012d, 4, (RRe, RRo, I48_I64, RRnpcsp), mve_scalar_shift1), + ToC("uqshll", ea51010f, 3, (RRe, RRo, I32), mve_scalar_shift), + ToC("urshrl", ea51011f, 3, (RRe, RRo, I32), mve_scalar_shift), + ToC("srshrl", ea51012f, 3, (RRe, RRo, I32), mve_scalar_shift), + ToC("sqshll", ea51013f, 3, (RRe, RRo, I32), mve_scalar_shift), + ToC("uqrshl", ea500f0d, 2, (RRnpcsp, RRnpcsp), mve_scalar_shift), + ToC("sqrshr", ea500f2d, 2, (RRnpcsp, RRnpcsp), mve_scalar_shift), + ToC("uqshl", ea500f0f, 2, (RRnpcsp, I32), mve_scalar_shift), + ToC("urshr", ea500f1f, 2, (RRnpcsp, I32), mve_scalar_shift), + ToC("srshr", ea500f2f, 2, (RRnpcsp, I32), mve_scalar_shift), + ToC("sqshl", ea500f3f, 2, (RRnpcsp, I32), mve_scalar_shift), ToC("vpt", ee410f00, 3, (COND, RMQ, RMQRZ), mve_vpt), ToC("vptt", ee018f00, 3, (COND, RMQ, RMQRZ), mve_vpt), @@ -24017,6 +26357,8 @@ static const struct asm_opcode insns[] = ToC("vpsteee", fe712f4d, 0, (), mve_vpt), /* MVE and MVE FP only. */ + mToC("vhcadd", ee000f00, 4, (RMQ, RMQ, RMQ, EXPi), mve_vhcadd), + mCEF(vctp, _vctp, 1, (RRnpc), mve_vctp), mCEF(vadc, _vadc, 3, (RMQ, RMQ, RMQ), mve_vadc), mCEF(vadci, _vadci, 3, (RMQ, RMQ, RMQ), mve_vadc), mToC("vsbc", fe300f00, 3, (RMQ, RMQ, RMQ), mve_vsbc), @@ -24062,11 +26404,101 @@ static const struct asm_opcode insns[] = mCEF(vaddlva, _vaddlva, 3, (RRe, RRo, RMQ), mve_vaddlv), mCEF(vaddv, _vaddv, 2, (RRe, RMQ), mve_vaddv), mCEF(vaddva, _vaddva, 2, (RRe, RMQ), mve_vaddv), + mCEF(vddup, _vddup, 3, (RMQ, RRe, EXPi), mve_viddup), + mCEF(vdwdup, _vdwdup, 4, (RMQ, RRe, RR, EXPi), mve_viddup), + mCEF(vidup, _vidup, 3, (RMQ, RRe, EXPi), mve_viddup), + mCEF(viwdup, _viwdup, 4, (RMQ, RRe, RR, EXPi), mve_viddup), + mToC("vmaxa", ee330e81, 2, (RMQ, RMQ), mve_vmaxa_vmina), + mToC("vmina", ee331e81, 2, (RMQ, RMQ), mve_vmaxa_vmina), + mCEF(vmaxv, _vmaxv, 2, (RR, RMQ), mve_vmaxv), + mCEF(vmaxav, _vmaxav, 2, (RR, RMQ), mve_vmaxv), + mCEF(vminv, _vminv, 2, (RR, RMQ), mve_vmaxv), + mCEF(vminav, _vminav, 2, (RR, RMQ), mve_vmaxv), + + mCEF(vmlaldav, _vmlaldav, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mCEF(vmlaldava, _vmlaldava, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mCEF(vmlaldavx, _vmlaldavx, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mCEF(vmlaldavax, _vmlaldavax, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mCEF(vmlalv, _vmlaldav, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mCEF(vmlalva, _vmlaldava, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mCEF(vmlsldav, _vmlsldav, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mCEF(vmlsldava, _vmlsldava, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mCEF(vmlsldavx, _vmlsldavx, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mCEF(vmlsldavax, _vmlsldavax, 4, (RRe, RRo, RMQ, RMQ), mve_vmlaldav), + mToC("vrmlaldavh", ee800f00, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + mToC("vrmlaldavha",ee800f20, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + mCEF(vrmlaldavhx, _vrmlaldavhx, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + mCEF(vrmlaldavhax, _vrmlaldavhax, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + mToC("vrmlalvh", ee800f00, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + mToC("vrmlalvha", ee800f20, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + mCEF(vrmlsldavh, _vrmlsldavh, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + mCEF(vrmlsldavha, _vrmlsldavha, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + mCEF(vrmlsldavhx, _vrmlsldavhx, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + mCEF(vrmlsldavhax, _vrmlsldavhax, 4, (RRe, RR, RMQ, RMQ), mve_vrmlaldavh), + + mToC("vmlas", ee011e40, 3, (RMQ, RMQ, RR), mve_vmlas), + mToC("vmulh", ee010e01, 3, (RMQ, RMQ, RMQ), mve_vmulh), + mToC("vrmulh", ee011e01, 3, (RMQ, RMQ, RMQ), mve_vmulh), + mToC("vpnot", fe310f4d, 0, (), mve_vpnot), + mToC("vpsel", fe310f01, 3, (RMQ, RMQ, RMQ), mve_vpsel), + + mToC("vqdmladh", ee000e00, 3, (RMQ, RMQ, RMQ), mve_vqdmladh), + mToC("vqdmladhx", ee001e00, 3, (RMQ, RMQ, RMQ), mve_vqdmladh), + mToC("vqrdmladh", ee000e01, 3, (RMQ, RMQ, RMQ), mve_vqdmladh), + mToC("vqrdmladhx",ee001e01, 3, (RMQ, RMQ, RMQ), mve_vqdmladh), + mToC("vqdmlsdh", fe000e00, 3, (RMQ, RMQ, RMQ), mve_vqdmladh), + mToC("vqdmlsdhx", fe001e00, 3, (RMQ, RMQ, RMQ), mve_vqdmladh), + mToC("vqrdmlsdh", fe000e01, 3, (RMQ, RMQ, RMQ), mve_vqdmladh), + mToC("vqrdmlsdhx",fe001e01, 3, (RMQ, RMQ, RMQ), mve_vqdmladh), + mToC("vqdmlah", ee000e60, 3, (RMQ, RMQ, RR), mve_vqdmlah), + mToC("vqdmlash", ee001e60, 3, (RMQ, RMQ, RR), mve_vqdmlah), + mToC("vqrdmlash", ee001e40, 3, (RMQ, RMQ, RR), mve_vqdmlah), + mToC("vqdmullt", ee301f00, 3, (RMQ, RMQ, RMQRR), mve_vqdmull), + mToC("vqdmullb", ee300f00, 3, (RMQ, RMQ, RMQRR), mve_vqdmull), + mCEF(vqmovnt, _vqmovnt, 2, (RMQ, RMQ), mve_vqmovn), + mCEF(vqmovnb, _vqmovnb, 2, (RMQ, RMQ), mve_vqmovn), + mCEF(vqmovunt, _vqmovunt, 2, (RMQ, RMQ), mve_vqmovn), + mCEF(vqmovunb, _vqmovunb, 2, (RMQ, RMQ), mve_vqmovn), + + mCEF(vshrnt, _vshrnt, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vshrnb, _vshrnb, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vrshrnt, _vrshrnt, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vrshrnb, _vrshrnb, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vqshrnt, _vqrshrnt, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vqshrnb, _vqrshrnb, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vqshrunt, _vqrshrunt, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vqshrunb, _vqrshrunb, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vqrshrnt, _vqrshrnt, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vqrshrnb, _vqrshrnb, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vqrshrunt, _vqrshrunt, 3, (RMQ, RMQ, I32z), mve_vshrn), + mCEF(vqrshrunb, _vqrshrunb, 3, (RMQ, RMQ, I32z), mve_vshrn), + + mToC("vshlc", eea00fc0, 3, (RMQ, RR, I32z), mve_vshlc), + mToC("vshllt", ee201e00, 3, (RMQ, RMQ, I32), mve_vshll), + mToC("vshllb", ee200e00, 3, (RMQ, RMQ, I32), mve_vshll), + + toU("dlstp", _dlstp, 2, (LR, RR), t_loloop), + toU("wlstp", _wlstp, 3, (LR, RR, EXP), t_loloop), + toU("letp", _letp, 2, (LR, EXP), t_loloop), + toU("lctp", _lctp, 0, (), t_loloop), + +#undef THUMB_VARIANT +#define THUMB_VARIANT & mve_fp_ext + mToC("vcmul", ee300e00, 4, (RMQ, RMQ, RMQ, EXPi), mve_vcmul), + mToC("vfmas", ee311e40, 3, (RMQ, RMQ, RR), mve_vfmas), + mToC("vmaxnma", ee3f0e81, 2, (RMQ, RMQ), mve_vmaxnma_vminnma), + mToC("vminnma", ee3f1e81, 2, (RMQ, RMQ), mve_vmaxnma_vminnma), + mToC("vmaxnmv", eeee0f00, 2, (RR, RMQ), mve_vmaxnmv), + mToC("vmaxnmav",eeec0f00, 2, (RR, RMQ), mve_vmaxnmv), + mToC("vminnmv", eeee0f80, 2, (RR, RMQ), mve_vmaxnmv), + mToC("vminnmav",eeec0f80, 2, (RR, RMQ), mve_vmaxnmv), #undef ARM_VARIANT #define ARM_VARIANT & fpu_vfp_ext_v1 #undef THUMB_VARIANT #define THUMB_VARIANT & arm_ext_v6t2 + mnCEF(vmla, _vmla, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), neon_mac_maybe_scalar), + mnCEF(vmul, _vmul, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), neon_mul), mcCE(fcpyd, eb00b40, 2, (RVD, RVD), vfp_dp_rd_rm), @@ -24105,14 +26537,102 @@ static const struct asm_opcode insns[] = mnUF(vcvtp, _vcvta, 2, (RNSDQMQ, oRNSDQMQ), neon_cvtp), mnUF(vcvtn, _vcvta, 3, (RNSDQMQ, oRNSDQMQ, oI32z), neon_cvtn), mnUF(vcvtm, _vcvta, 2, (RNSDQMQ, oRNSDQMQ), neon_cvtm), + mnUF(vmaxnm, _vmaxnm, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ), vmaxnm), + mnUF(vminnm, _vminnm, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ), vmaxnm), #undef ARM_VARIANT #define ARM_VARIANT & fpu_neon_ext_v1 - mnUF(vabd, _vabd, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su), + mnUF(vabd, _vabd, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su), mnUF(vabdl, _vabdl, 3, (RNQMQ, RNDMQ, RNDMQ), neon_dyadic_long), - mnUF(vaddl, _vaddl, 3, (RNQMQ, RNDMQ, RNDMQR), neon_dyadic_long), - mnUF(vsubl, _vsubl, 3, (RNQMQ, RNDMQ, RNDMQR), neon_dyadic_long), + mnUF(vaddl, _vaddl, 3, (RNSDQMQ, oRNSDMQ, RNSDMQR), neon_dyadic_long), + mnUF(vsubl, _vsubl, 3, (RNSDQMQ, oRNSDMQ, RNSDMQR), neon_dyadic_long), + mnUF(vand, _vand, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic), + mnUF(vbic, _vbic, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic), + mnUF(vorr, _vorr, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic), + mnUF(vorn, _vorn, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_Ibig), neon_logic), + mnUF(veor, _veor, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_logic), + MNUF(vcls, 1b00400, 2, (RNDQMQ, RNDQMQ), neon_cls), + MNUF(vclz, 1b00480, 2, (RNDQMQ, RNDQMQ), neon_clz), + mnCE(vdup, _vdup, 2, (RNDQMQ, RR_RNSC), neon_dup), + MNUF(vhadd, 00000000, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i_su), + MNUF(vrhadd, 00000100, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_i_su), + MNUF(vhsub, 00000200, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i_su), + mnUF(vmin, _vmin, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su), + mnUF(vmax, _vmax, 3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su), + MNUF(vqadd, 0000010, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i64_su), + MNUF(vqsub, 0000210, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i64_su), + mnUF(vmvn, _vmvn, 2, (RNDQMQ, RNDQMQ_Ibig), neon_mvn), + MNUF(vqabs, 1b00700, 2, (RNDQMQ, RNDQMQ), neon_sat_abs_neg), + MNUF(vqneg, 1b00780, 2, (RNDQMQ, RNDQMQ), neon_sat_abs_neg), + mnUF(vqrdmlah, _vqrdmlah,3, (RNDQMQ, oRNDQMQ, RNDQ_RNSC_RR), neon_qrdmlah), + mnUF(vqdmulh, _vqdmulh, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_RNSC_RR), neon_qdmulh), + mnUF(vqrdmulh, _vqrdmulh,3, (RNDQMQ, oRNDQMQ, RNDQMQ_RNSC_RR), neon_qdmulh), + MNUF(vqrshl, 0000510, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_rshl), + MNUF(vrshl, 0000500, 3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_rshl), + MNUF(vshr, 0800010, 3, (RNDQMQ, oRNDQMQ, I64z), neon_rshift_round_imm), + MNUF(vrshr, 0800210, 3, (RNDQMQ, oRNDQMQ, I64z), neon_rshift_round_imm), + MNUF(vsli, 1800510, 3, (RNDQMQ, oRNDQMQ, I63), neon_sli), + MNUF(vsri, 1800410, 3, (RNDQMQ, oRNDQMQ, I64z), neon_sri), + MNUF(vrev64, 1b00000, 2, (RNDQMQ, RNDQMQ), neon_rev), + MNUF(vrev32, 1b00080, 2, (RNDQMQ, RNDQMQ), neon_rev), + MNUF(vrev16, 1b00100, 2, (RNDQMQ, RNDQMQ), neon_rev), + mnUF(vshl, _vshl, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_I63b_RR), neon_shl), + mnUF(vqshl, _vqshl, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_I63b_RR), neon_qshl), + MNUF(vqshlu, 1800610, 3, (RNDQMQ, oRNDQMQ, I63), neon_qshlu_imm), + +#undef ARM_VARIANT +#define ARM_VARIANT & arm_ext_v8_3 +#undef THUMB_VARIANT +#define THUMB_VARIANT & arm_ext_v6t2_v8m + MNUF (vcadd, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ, EXPi), vcadd), + MNUF (vcmla, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ_RNSC, EXPi), vcmla), + +#undef ARM_VARIANT +#define ARM_VARIANT &arm_ext_bf16 +#undef THUMB_VARIANT +#define THUMB_VARIANT &arm_ext_bf16 + TUF ("vdot", c000d00, fc000d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vdot, vdot), + TUF ("vmmla", c000c40, fc000c40, 3, (RNQ, RNQ, RNQ), vmmla, vmmla), + TUF ("vfmab", c300810, fc300810, 3, (RNDQ, RNDQ, RNDQ_RNSC), bfloat_vfma, bfloat_vfma), + +#undef ARM_VARIANT +#define ARM_VARIANT &arm_ext_i8mm +#undef THUMB_VARIANT +#define THUMB_VARIANT &arm_ext_i8mm + TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla), + TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla), + TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla), + TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot), + TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot), + +#undef ARM_VARIANT +#undef THUMB_VARIANT +#define THUMB_VARIANT &arm_ext_cde + ToC ("cx1", ee000000, 3, (RCP, APSR_RR, I8191), cx1), + ToC ("cx1a", fe000000, 3, (RCP, APSR_RR, I8191), cx1a), + ToC ("cx1d", ee000040, 4, (RCP, RR, APSR_RR, I8191), cx1d), + ToC ("cx1da", fe000040, 4, (RCP, RR, APSR_RR, I8191), cx1da), + + ToC ("cx2", ee400000, 4, (RCP, APSR_RR, APSR_RR, I511), cx2), + ToC ("cx2a", fe400000, 4, (RCP, APSR_RR, APSR_RR, I511), cx2a), + ToC ("cx2d", ee400040, 5, (RCP, RR, APSR_RR, APSR_RR, I511), cx2d), + ToC ("cx2da", fe400040, 5, (RCP, RR, APSR_RR, APSR_RR, I511), cx2da), + + ToC ("cx3", ee800000, 5, (RCP, APSR_RR, APSR_RR, APSR_RR, I63), cx3), + ToC ("cx3a", fe800000, 5, (RCP, APSR_RR, APSR_RR, APSR_RR, I63), cx3a), + ToC ("cx3d", ee800040, 6, (RCP, RR, APSR_RR, APSR_RR, APSR_RR, I63), cx3d), + ToC ("cx3da", fe800040, 6, (RCP, RR, APSR_RR, APSR_RR, APSR_RR, I63), cx3da), + + mToC ("vcx1", ec200000, 3, (RCP, RNSDMQ, I4095), vcx1), + mToC ("vcx1a", fc200000, 3, (RCP, RNSDMQ, I4095), vcx1), + + mToC ("vcx2", ec300000, 4, (RCP, RNSDMQ, RNSDMQ, I127), vcx2), + mToC ("vcx2a", fc300000, 4, (RCP, RNSDMQ, RNSDMQ, I127), vcx2), + + mToC ("vcx3", ec800000, 5, (RCP, RNSDMQ, RNSDMQ, RNSDMQ, I15), vcx3), + mToC ("vcx3a", fc800000, 5, (RCP, RNSDMQ, RNSDMQ, RNSDMQ, I15), vcx3), }; + #undef ARM_VARIANT #undef THUMB_VARIANT #undef TCE @@ -24841,7 +27361,7 @@ arm_init_frag (fragS * fragP, int max_chars) /* PR 21809: Do not set a mapping state for debug sections - it just confuses other tools. */ - if (bfd_get_section_flags (NULL, now_seg) & SEC_DEBUGGING) + if (bfd_section_flags (now_seg) & SEC_DEBUGGING) return; frag_thumb_mode = fragP->tc_frag_data.thumb_mode ^ MODE_RECORDED; @@ -25024,7 +27544,7 @@ start_unwind_section (const segT text_seg, int idx) const char * text_name; const char * prefix; const char * prefix_once; - const char * group_name; + struct elf_section_match match; char * sec_name; int type; int flags; @@ -25058,13 +27578,13 @@ start_unwind_section (const segT text_seg, int idx) flags = SHF_ALLOC; linkonce = 0; - group_name = 0; + memset (&match, 0, sizeof (match)); /* Handle COMDAT group. */ if (prefix != prefix_once && (text_seg->flags & SEC_LINK_ONCE) != 0) { - group_name = elf_group_name (text_seg); - if (group_name == NULL) + match.group_name = elf_group_name (text_seg); + if (match.group_name == NULL) { as_bad (_("Group section `%s' has no group signature"), segment_name (text_seg)); @@ -25075,7 +27595,7 @@ start_unwind_section (const segT text_seg, int idx) linkonce = 1; } - obj_elf_change_section (sec_name, type, 0, flags, 0, group_name, + obj_elf_change_section (sec_name, type, flags, 0, &match, linkonce, 0); /* Set the section link for index tables. */ @@ -26264,11 +28784,12 @@ md_apply_fix (fixS * fixP, break; case BFD_RELOC_ARM_SMC: - if (((unsigned long) value) > 0xffff) + if (((unsigned long) value) > 0xf) as_bad_where (fixP->fx_file, fixP->fx_line, _("invalid smc expression")); + newval = md_chars_to_number (buf, INSN_SIZE); - newval |= (value & 0xf) | ((value & 0xfff0) << 4); + newval |= (value & 0xf); md_number_to_chars (buf, newval, INSN_SIZE); break; @@ -26437,7 +28958,7 @@ md_apply_fix (fixS * fixP, break; case BFD_RELOC_THUMB_PCREL_BRANCH9: /* Conditional branch. */ - if ((value & ~0xff) && ((value & ~0xff) != ~0xff)) + if (out_of_range_p (value, 8)) as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE); if (fixP->fx_done || !seg->use_rela_p) @@ -26449,7 +28970,7 @@ md_apply_fix (fixS * fixP, break; case BFD_RELOC_THUMB_PCREL_BRANCH12: /* Unconditional branch. */ - if ((value & ~0x7ff) && ((value & ~0x7ff) != ~0x7ff)) + if (out_of_range_p (value, 11)) as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE); if (fixP->fx_done || !seg->use_rela_p) @@ -26460,6 +28981,7 @@ md_apply_fix (fixS * fixP, } break; + /* This relocation is misnamed, it should be BRANCH21. */ case BFD_RELOC_THUMB_PCREL_BRANCH20: if (fixP->fx_addsy && (S_GET_SEGMENT (fixP->fx_addsy) == seg) @@ -26470,7 +28992,7 @@ md_apply_fix (fixS * fixP, /* Force a relocation for a branch 20 bits wide. */ fixP->fx_done = 0; } - if ((value & ~0x1fffff) && ((value & ~0x0fffff) != ~0x0fffff)) + if (out_of_range_p (value, 20)) as_bad_where (fixP->fx_file, fixP->fx_line, _("conditional branch out of range")); @@ -26549,12 +29071,11 @@ md_apply_fix (fixS * fixP, fixP->fx_r_type = BFD_RELOC_THUMB_PCREL_BRANCH23; #endif - if ((value & ~0x3fffff) && ((value & ~0x3fffff) != ~0x3fffff)) + if (out_of_range_p (value, 22)) { if (!(ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2))) as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE); - else if ((value & ~0x1ffffff) - && ((value & ~0x1ffffff) != ~0x1ffffff)) + else if (out_of_range_p (value, 24)) as_bad_where (fixP->fx_file, fixP->fx_line, _("Thumb2 branch out of range")); } @@ -26565,7 +29086,7 @@ md_apply_fix (fixS * fixP, break; case BFD_RELOC_THUMB_PCREL_BRANCH25: - if ((value & ~0x0ffffff) && ((value & ~0x0ffffff) != ~0x0ffffff)) + if (out_of_range_p (value, 24)) as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE); if (fixP->fx_done || !seg->use_rela_p) @@ -26771,6 +29292,9 @@ md_apply_fix (fixS * fixP, (((unsigned long) fixP->fx_frag->fr_address + (unsigned long) fixP->fx_where) & ~3) + (unsigned long) value); + else if (get_recorded_alignment (seg) < 2) + as_warn_where (fixP->fx_file, fixP->fx_line, + _("section does not have enough alignment to ensure safe PC-relative loads")); if (value & ~0x3fc) as_bad_where (fixP->fx_file, fixP->fx_line, @@ -27409,9 +29933,10 @@ md_apply_fix (fixS * fixP, } bfd_vma insn = get_thumb32_insn (buf); - /* le lr,