From ef8f595f73a6b42f745bc76a716f45079eae1075 Mon Sep 17 00:00:00 2001
From: Mihail Ionescu <mihail.ionescu@arm.com>
Date: Tue, 12 Nov 2019 13:57:20 +0000
Subject: [PATCH] [gas][arm] Enable VLDM, VSTM, VPUSH, VPOP for MVE

This patch enables a few instructions for Armv8.1-M MVE. Currently VLDM,
VSTM, VSTR, VLDR, VPUSH and VPOP are enabled only when the Armv8-M
Floating-point Extension is enabled.  According to the ARMv8.1-M ARM,
section A.1.4.2[1], they can be enabled by having "Armv8-M Floating-point
Extension and/or Armv8.1-M MVE".

[1]https://developer.arm.com/docs/ddi0553/bh/armv81-m-architecture-reference-manual

2019-11-12  Mihail Ionescu  <mihail.ionescu@arm.com>

	* config/tc-arm.c (do_vfp_nsyn_push): Move in order to enable it for
	both fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vstm
	instruction for mve_ext.
	(do_vfp_nsyn_pop): Move in order to enable it for both
	fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vldm
	instruction for mve_ext.
	(do_neon_ldm_stm): Add fpu_vfp_ext_v1 and mve_ext checks.
	(insns): Enable vldm, vldmia, vldmdb, vstm, vstmia, vstmdb, vpop,
	vpush, and fldd, fstd, flds, fsts for arm_ext_v6t2 instead
	of fpu_vfp_ext_v1xd.
	* testsuite/gas/arm/v8_1m-mve.s: New.
	* testsuite/gas/arm/v8_1m-mve.d: New.
---
 gas/ChangeLog                     |  15 ++++
 gas/config/tc-arm.c               | 109 ++++++++++++++++++------------
 gas/testsuite/gas/arm/v8_1m-mve.d |  27 ++++++++
 gas/testsuite/gas/arm/v8_1m-mve.s |  24 +++++++
 4 files changed, 131 insertions(+), 44 deletions(-)
 create mode 100644 gas/testsuite/gas/arm/v8_1m-mve.d
 create mode 100644 gas/testsuite/gas/arm/v8_1m-mve.s

diff --git a/gas/ChangeLog b/gas/ChangeLog
index f84de0f25c..d0ce992223 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,3 +1,18 @@
+2019-11-12  Mihail Ionescu  <mihail.ionescu@arm.com>
+
+	* config/tc-arm.c (do_vfp_nsyn_push): Move in order to enable it for
+	both fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vstm
+	instruction for mve_ext.
+	(do_vfp_nsyn_pop): Move in order to enable it for both
+	fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vldm
+	instruction for mve_ext.
+	(do_neon_ldm_stm): Add fpu_vfp_ext_v1 and mve_ext checks.
+	(insns): Enable vldm, vldmia, vldmdb, vstm, vstmia, vstmdb, vpop,
+	vpush, and fldd, fstd, flds, fsts for arm_ext_v6t2 instead
+	of fpu_vfp_ext_v1xd.
+	* testsuite/gas/arm/v8_1m-mve.s: New.
+	* testsuite/gas/arm/v8_1m-mve.d: New.
+
 2019-11-12  Mihail Ionescu  <mihail.ionescu@arm.com>
 
 	* gas/config/tc-arm.c (do_neon_mvn): Allow mve_ext cmode=0xd.
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
index a76aaf3621..641ce7e3a6 100644
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -16527,36 +16527,6 @@ nsyn_insert_sp (void)
   inst.operands[0].present = 1;
 }
 
-static void
-do_vfp_nsyn_push (void)
-{
-  nsyn_insert_sp ();
-
-  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
-	      _("register list must contain at least 1 and at most 16 "
-		"registers"));
-
-  if (inst.operands[1].issingle)
-    do_vfp_nsyn_opcode ("fstmdbs");
-  else
-    do_vfp_nsyn_opcode ("fstmdbd");
-}
-
-static void
-do_vfp_nsyn_pop (void)
-{
-  nsyn_insert_sp ();
-
-  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
-	      _("register list must contain at least 1 and at most 16 "
-		"registers"));
-
-  if (inst.operands[1].issingle)
-    do_vfp_nsyn_opcode ("fldmias");
-  else
-    do_vfp_nsyn_opcode ("fldmiad");
-}
-
 /* Fix up Neon data-processing instructions, ORing in the correct bits for
    ARM mode or Thumb mode and moving the encoded bit 24 to bit 28.  */
 
@@ -20638,6 +20608,9 @@ do_neon_tbl_tbx (void)
 static void
 do_neon_ldm_stm (void)
 {
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)
+	      && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+	      _(BAD_FPU));
   /* P, U and L bits are part of bitmask.  */
   int is_dbmode = (inst.instruction & (1 << 24)) != 0;
   unsigned offsetbits = inst.operands[1].imm * 2;
@@ -20665,6 +20638,49 @@ do_neon_ldm_stm (void)
   do_vfp_cond_or_thumb ();
 }
 
+static void
+do_vfp_nsyn_pop (void)
+{
+  nsyn_insert_sp ();
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
+    return do_vfp_nsyn_opcode ("vldm");
+  }
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
+	      _(BAD_FPU));
+
+  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+	      _("register list must contain at least 1 and at most 16 "
+		"registers"));
+
+  if (inst.operands[1].issingle)
+    do_vfp_nsyn_opcode ("fldmias");
+  else
+    do_vfp_nsyn_opcode ("fldmiad");
+}
+
+static void
+do_vfp_nsyn_push (void)
+{
+  nsyn_insert_sp ();
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
+    return do_vfp_nsyn_opcode ("vstmdb");
+  }
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
+	      _(BAD_FPU));
+
+  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+	      _("register list must contain at least 1 and at most 16 "
+		"registers"));
+
+  if (inst.operands[1].issingle)
+    do_vfp_nsyn_opcode ("fstmdbs");
+  else
+    do_vfp_nsyn_opcode ("fstmdbd");
+}
+
+
 static void
 do_neon_ldr_str (void)
 {
@@ -20745,7 +20761,8 @@ do_vldr_vstr (void)
   /* VLDR/VSTR.  */
   else
     {
-      if (!mark_feature_used (&fpu_vfp_ext_v1xd))
+      if (!mark_feature_used (&fpu_vfp_ext_v1xd)
+	  && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
 	as_bad (_("Instruction not permitted on this architecture"));
       do_neon_ldr_str ();
     }
@@ -24966,6 +24983,10 @@ static const struct asm_opcode insns[] =
 #define THUMB_VARIANT  & arm_ext_v6t2
  mcCE(vmrs,	ef00a10, 2, (APSR_RR, RVC),   vmrs),
  mcCE(vmsr,	ee00a10, 2, (RVC, RR),        vmsr),
+ mcCE(fldd,	d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ mcCE(fstd,	d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ mcCE(flds,	d100a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
+ mcCE(fsts,	d000a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
 #undef THUMB_VARIANT
 
   /* Moves and type conversions.  */
@@ -24980,8 +25001,6 @@ static const struct asm_opcode insns[] =
  cCE("fmxr",	ee00a10, 2, (RVC, RR),	      rn_rd),
 
   /* Memory operations.	 */
- cCE("flds",	d100a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
- cCE("fsts",	d000a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
  cCE("fldmias",	c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
  cCE("fldmfds",	c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
  cCE("fldmdbs",	d300a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
@@ -25023,8 +25042,6 @@ static const struct asm_opcode insns[] =
 
  /* Double precision load/store are still present on single precision
     implementations.  */
- cCE("fldd",	d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
- cCE("fstd",	d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
  cCE("fldmiad",	c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
  cCE("fldmfdd",	c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
  cCE("fldmdbd",	d300b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmdb),
@@ -25077,6 +25094,19 @@ static const struct asm_opcode insns[] =
    Individual encoder functions perform additional architecture checks.  */
 #undef  ARM_VARIANT
 #define ARM_VARIANT    & fpu_vfp_ext_v1xd
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
+
+ NCE(vldm,      c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmia,    c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmdb,    d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstm,      c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmia,    c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmdb,    d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+
+ NCE(vpop,      0,       1, (VRSDLST),          vfp_nsyn_pop),
+ NCE(vpush,     0,       1, (VRSDLST),          vfp_nsyn_push),
+
 #undef  THUMB_VARIANT
 #define THUMB_VARIANT  & fpu_vfp_ext_v1xd
 
@@ -25086,20 +25116,11 @@ static const struct asm_opcode insns[] =
  nCE(vnmul,     _vnmul,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
  nCE(vnmla,     _vnmla,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
  nCE(vnmls,     _vnmls,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
- NCE(vpush,     0,       1, (VRSDLST),          vfp_nsyn_push),
- NCE(vpop,      0,       1, (VRSDLST),          vfp_nsyn_pop),
  NCE(vcvtz,     0,       2, (RVSD, RVSD),       vfp_nsyn_cvtz),
 
   /* Mnemonics shared by Neon and VFP.  */
  nCEF(vmls,     _vmls,    3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
 
- NCE(vldm,      c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vldmia,    c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vldmdb,    d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstm,      c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstmia,    c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstmdb,    d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
-
  mnCEF(vcvt,     _vcvt,   3, (RNSDQMQ, RNSDQMQ, oI32z), neon_cvt),
  nCEF(vcvtr,    _vcvt,   2, (RNSDQ, RNSDQ), neon_cvtr),
  MNCEF(vcvtb,	eb20a40, 3, (RVSDMQ, RVSDMQ, oI32b), neon_cvtb),
diff --git a/gas/testsuite/gas/arm/v8_1m-mve.d b/gas/testsuite/gas/arm/v8_1m-mve.d
new file mode 100644
index 0000000000..4c528de073
--- /dev/null
+++ b/gas/testsuite/gas/arm/v8_1m-mve.d
@@ -0,0 +1,27 @@
+# name: V8.1-m FP register instructions enabled by +mve
+# as: -march=armv8.1-m.main+mve
+# objdump: -dr --show-raw-insn -marmv8.1-m.main
+
+.*: +file format .*arm.*
+
+
+Disassembly of section .text:
+
+00000000 <\.text>:
+ *[0-9a-f]+:	ec80 0b08 	vstmia	r0, {d0-d3}
+ *[0-9a-f]+:	ecb7 3b04 	vldmia	r7!, {d3-d4}
+ *[0-9a-f]+:	ecbd 0b06 	vpop	{d0-d2}
+ *[0-9a-f]+:	ed2d 0b06 	vpush	{d0-d2}
+ *[0-9a-f]+:	ecbd 2b08 	vpop	{d2-d5}
+ *[0-9a-f]+:	ed2d 1b0c 	vpush	{d1-d6}
+ *[0-9a-f]+:	fe71 0f4d 	vpst
+ *[0-9a-f]+:	fd00 3e01 	vstrwt\.32	q1, \[q0, #-4\]
+ *[0-9a-f]+:	ed82 2f80 	vstr	FPSCR, \[r2\]
+ *[0-9a-f]+:	ed80 0b00 	vstr	d0, \[r0\]
+ *[0-9a-f]+:	ed90 0b00 	vldr	d0, \[r0\]
+ *[0-9a-f]+:	ed80 0a00 	vstr	s0, \[r0\]
+ *[0-9a-f]+:	ed90 0a00 	vldr	s0, \[r0\]
+ *[0-9a-f]+:	ed81 fb00 	vstr	d15, \[r1\]
+ *[0-9a-f]+:	ed91 fb00 	vldr	d15, \[r1\]
+ *[0-9a-f]+:	edc1 fa00 	vstr	s31, \[r1\]
+ *[0-9a-f]+:	edd1 fa00 	vldr	s31, \[r1\]
diff --git a/gas/testsuite/gas/arm/v8_1m-mve.s b/gas/testsuite/gas/arm/v8_1m-mve.s
new file mode 100644
index 0000000000..cae1f93c15
--- /dev/null
+++ b/gas/testsuite/gas/arm/v8_1m-mve.s
@@ -0,0 +1,24 @@
+.syntax unified
+
+vstmia r0,{d0-d3}
+vldmia    r7!, {d3-d4}
+
+vpop {d0-d2}
+vpush {d0-d2}
+vpop {d2-d5}
+vpush {d1-d6}
+
+vpst
+vstrwt.u32 q1, [q0, #-4]
+
+vstr FPSCR, [r2] @ Accepts offset variant without immediate
+
+vstr d0,[r0]
+vldr d0,[r0]
+vstr s0,[r0]
+vldr s0,[r0]
+
+vstr d15,[r1]
+vldr d15,[r1]
+vstr s31,[r1]
+vldr s31,[r1]
-- 
2.34.1