From ef8f595f73a6b42f745bc76a716f45079eae1075 Mon Sep 17 00:00:00 2001 From: Mihail Ionescu Date: Tue, 12 Nov 2019 13:57:20 +0000 Subject: [PATCH] [gas][arm] Enable VLDM, VSTM, VPUSH, VPOP for MVE This patch enables a few instructions for Armv8.1-M MVE. Currently VLDM, VSTM, VSTR, VLDR, VPUSH and VPOP are enabled only when the Armv8-M Floating-point Extension is enabled. According to the ARMv8.1-M ARM, section A.1.4.2[1], they can be enabled by having "Armv8-M Floating-point Extension and/or Armv8.1-M MVE". [1]https://developer.arm.com/docs/ddi0553/bh/armv81-m-architecture-reference-manual 2019-11-12 Mihail Ionescu * config/tc-arm.c (do_vfp_nsyn_push): Move in order to enable it for both fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vstm instruction for mve_ext. (do_vfp_nsyn_pop): Move in order to enable it for both fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vldm instruction for mve_ext. (do_neon_ldm_stm): Add fpu_vfp_ext_v1 and mve_ext checks. (insns): Enable vldm, vldmia, vldmdb, vstm, vstmia, vstmdb, vpop, vpush, and fldd, fstd, flds, fsts for arm_ext_v6t2 instead of fpu_vfp_ext_v1xd. * testsuite/gas/arm/v8_1m-mve.s: New. * testsuite/gas/arm/v8_1m-mve.d: New. --- gas/ChangeLog | 15 ++++ gas/config/tc-arm.c | 109 ++++++++++++++++++------------ gas/testsuite/gas/arm/v8_1m-mve.d | 27 ++++++++ gas/testsuite/gas/arm/v8_1m-mve.s | 24 +++++++ 4 files changed, 131 insertions(+), 44 deletions(-) create mode 100644 gas/testsuite/gas/arm/v8_1m-mve.d create mode 100644 gas/testsuite/gas/arm/v8_1m-mve.s diff --git a/gas/ChangeLog b/gas/ChangeLog index f84de0f25c..d0ce992223 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,18 @@ +2019-11-12 Mihail Ionescu + + * config/tc-arm.c (do_vfp_nsyn_push): Move in order to enable it for + both fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vstm + instruction for mve_ext. + (do_vfp_nsyn_pop): Move in order to enable it for both + fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vldm + instruction for mve_ext. + (do_neon_ldm_stm): Add fpu_vfp_ext_v1 and mve_ext checks. + (insns): Enable vldm, vldmia, vldmdb, vstm, vstmia, vstmdb, vpop, + vpush, and fldd, fstd, flds, fsts for arm_ext_v6t2 instead + of fpu_vfp_ext_v1xd. + * testsuite/gas/arm/v8_1m-mve.s: New. + * testsuite/gas/arm/v8_1m-mve.d: New. + 2019-11-12 Mihail Ionescu * gas/config/tc-arm.c (do_neon_mvn): Allow mve_ext cmode=0xd. diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c index a76aaf3621..641ce7e3a6 100644 --- a/gas/config/tc-arm.c +++ b/gas/config/tc-arm.c @@ -16527,36 +16527,6 @@ nsyn_insert_sp (void) inst.operands[0].present = 1; } -static void -do_vfp_nsyn_push (void) -{ - nsyn_insert_sp (); - - constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16, - _("register list must contain at least 1 and at most 16 " - "registers")); - - if (inst.operands[1].issingle) - do_vfp_nsyn_opcode ("fstmdbs"); - else - do_vfp_nsyn_opcode ("fstmdbd"); -} - -static void -do_vfp_nsyn_pop (void) -{ - nsyn_insert_sp (); - - constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16, - _("register list must contain at least 1 and at most 16 " - "registers")); - - if (inst.operands[1].issingle) - do_vfp_nsyn_opcode ("fldmias"); - else - do_vfp_nsyn_opcode ("fldmiad"); -} - /* Fix up Neon data-processing instructions, ORing in the correct bits for ARM mode or Thumb mode and moving the encoded bit 24 to bit 28. */ @@ -20638,6 +20608,9 @@ do_neon_tbl_tbx (void) static void do_neon_ldm_stm (void) { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); /* P, U and L bits are part of bitmask. */ int is_dbmode = (inst.instruction & (1 << 24)) != 0; unsigned offsetbits = inst.operands[1].imm * 2; @@ -20665,6 +20638,49 @@ do_neon_ldm_stm (void) do_vfp_cond_or_thumb (); } +static void +do_vfp_nsyn_pop (void) +{ + nsyn_insert_sp (); + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) { + return do_vfp_nsyn_opcode ("vldm"); + } + + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd), + _(BAD_FPU)); + + constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16, + _("register list must contain at least 1 and at most 16 " + "registers")); + + if (inst.operands[1].issingle) + do_vfp_nsyn_opcode ("fldmias"); + else + do_vfp_nsyn_opcode ("fldmiad"); +} + +static void +do_vfp_nsyn_push (void) +{ + nsyn_insert_sp (); + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) { + return do_vfp_nsyn_opcode ("vstmdb"); + } + + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd), + _(BAD_FPU)); + + constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16, + _("register list must contain at least 1 and at most 16 " + "registers")); + + if (inst.operands[1].issingle) + do_vfp_nsyn_opcode ("fstmdbs"); + else + do_vfp_nsyn_opcode ("fstmdbd"); +} + + static void do_neon_ldr_str (void) { @@ -20745,7 +20761,8 @@ do_vldr_vstr (void) /* VLDR/VSTR. */ else { - if (!mark_feature_used (&fpu_vfp_ext_v1xd)) + if (!mark_feature_used (&fpu_vfp_ext_v1xd) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) as_bad (_("Instruction not permitted on this architecture")); do_neon_ldr_str (); } @@ -24966,6 +24983,10 @@ static const struct asm_opcode insns[] = #define THUMB_VARIANT & arm_ext_v6t2 mcCE(vmrs, ef00a10, 2, (APSR_RR, RVC), vmrs), mcCE(vmsr, ee00a10, 2, (RVC, RR), vmsr), + mcCE(fldd, d100b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst), + mcCE(fstd, d000b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst), + mcCE(flds, d100a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst), + mcCE(fsts, d000a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst), #undef THUMB_VARIANT /* Moves and type conversions. */ @@ -24980,8 +25001,6 @@ static const struct asm_opcode insns[] = cCE("fmxr", ee00a10, 2, (RVC, RR), rn_rd), /* Memory operations. */ - cCE("flds", d100a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst), - cCE("fsts", d000a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst), cCE("fldmias", c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia), cCE("fldmfds", c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia), cCE("fldmdbs", d300a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb), @@ -25023,8 +25042,6 @@ static const struct asm_opcode insns[] = /* Double precision load/store are still present on single precision implementations. */ - cCE("fldd", d100b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst), - cCE("fstd", d000b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst), cCE("fldmiad", c900b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmia), cCE("fldmfdd", c900b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmia), cCE("fldmdbd", d300b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmdb), @@ -25077,6 +25094,19 @@ static const struct asm_opcode insns[] = Individual encoder functions perform additional architecture checks. */ #undef ARM_VARIANT #define ARM_VARIANT & fpu_vfp_ext_v1xd +#undef THUMB_VARIANT +#define THUMB_VARIANT & arm_ext_v6t2 + + NCE(vldm, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vldmia, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vldmdb, d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vstm, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vstmia, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + NCE(vstmdb, d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), + + NCE(vpop, 0, 1, (VRSDLST), vfp_nsyn_pop), + NCE(vpush, 0, 1, (VRSDLST), vfp_nsyn_push), + #undef THUMB_VARIANT #define THUMB_VARIANT & fpu_vfp_ext_v1xd @@ -25086,20 +25116,11 @@ static const struct asm_opcode insns[] = nCE(vnmul, _vnmul, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul), nCE(vnmla, _vnmla, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul), nCE(vnmls, _vnmls, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul), - NCE(vpush, 0, 1, (VRSDLST), vfp_nsyn_push), - NCE(vpop, 0, 1, (VRSDLST), vfp_nsyn_pop), NCE(vcvtz, 0, 2, (RVSD, RVSD), vfp_nsyn_cvtz), /* Mnemonics shared by Neon and VFP. */ nCEF(vmls, _vmls, 3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar), - NCE(vldm, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vldmia, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vldmdb, d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vstm, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vstmia, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - NCE(vstmdb, d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm), - mnCEF(vcvt, _vcvt, 3, (RNSDQMQ, RNSDQMQ, oI32z), neon_cvt), nCEF(vcvtr, _vcvt, 2, (RNSDQ, RNSDQ), neon_cvtr), MNCEF(vcvtb, eb20a40, 3, (RVSDMQ, RVSDMQ, oI32b), neon_cvtb), diff --git a/gas/testsuite/gas/arm/v8_1m-mve.d b/gas/testsuite/gas/arm/v8_1m-mve.d new file mode 100644 index 0000000000..4c528de073 --- /dev/null +++ b/gas/testsuite/gas/arm/v8_1m-mve.d @@ -0,0 +1,27 @@ +# name: V8.1-m FP register instructions enabled by +mve +# as: -march=armv8.1-m.main+mve +# objdump: -dr --show-raw-insn -marmv8.1-m.main + +.*: +file format .*arm.* + + +Disassembly of section .text: + +00000000 <\.text>: + *[0-9a-f]+: ec80 0b08 vstmia r0, {d0-d3} + *[0-9a-f]+: ecb7 3b04 vldmia r7!, {d3-d4} + *[0-9a-f]+: ecbd 0b06 vpop {d0-d2} + *[0-9a-f]+: ed2d 0b06 vpush {d0-d2} + *[0-9a-f]+: ecbd 2b08 vpop {d2-d5} + *[0-9a-f]+: ed2d 1b0c vpush {d1-d6} + *[0-9a-f]+: fe71 0f4d vpst + *[0-9a-f]+: fd00 3e01 vstrwt\.32 q1, \[q0, #-4\] + *[0-9a-f]+: ed82 2f80 vstr FPSCR, \[r2\] + *[0-9a-f]+: ed80 0b00 vstr d0, \[r0\] + *[0-9a-f]+: ed90 0b00 vldr d0, \[r0\] + *[0-9a-f]+: ed80 0a00 vstr s0, \[r0\] + *[0-9a-f]+: ed90 0a00 vldr s0, \[r0\] + *[0-9a-f]+: ed81 fb00 vstr d15, \[r1\] + *[0-9a-f]+: ed91 fb00 vldr d15, \[r1\] + *[0-9a-f]+: edc1 fa00 vstr s31, \[r1\] + *[0-9a-f]+: edd1 fa00 vldr s31, \[r1\] diff --git a/gas/testsuite/gas/arm/v8_1m-mve.s b/gas/testsuite/gas/arm/v8_1m-mve.s new file mode 100644 index 0000000000..cae1f93c15 --- /dev/null +++ b/gas/testsuite/gas/arm/v8_1m-mve.s @@ -0,0 +1,24 @@ +.syntax unified + +vstmia r0,{d0-d3} +vldmia r7!, {d3-d4} + +vpop {d0-d2} +vpush {d0-d2} +vpop {d2-d5} +vpush {d1-d6} + +vpst +vstrwt.u32 q1, [q0, #-4] + +vstr FPSCR, [r2] @ Accepts offset variant without immediate + +vstr d0,[r0] +vldr d0,[r0] +vstr s0,[r0] +vldr s0,[r0] + +vstr d15,[r1] +vldr d15,[r1] +vstr s31,[r1] +vldr s31,[r1] -- 2.34.1