sim/aarch64/simulator.c

   1 /* simulator.c -- Interface for the AArch64 simulator.
   2
   3    Copyright (C) 2015-2021 Free Software Foundation, Inc.
   4
   5    Contributed by Red Hat.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 /* This must come before any other includes.  */
  23 #include "defs.h"
  24
  25 #include <stdlib.h>
  26 #include <stdio.h>
  27 #include <string.h>
  28 #include <sys/types.h>
  29 #include <math.h>
  30 #include <time.h>
  31 #include <limits.h>
  32
  33 #include "simulator.h"
  34 #include "cpustate.h"
  35 #include "memory.h"
  36
  37 #include "sim-signal.h"
  38
  39 #define NO_SP 0
  40 #define SP_OK 1
  41
  42 #define TST(_flag)   (aarch64_test_CPSR_bit (cpu, _flag))
  43 #define IS_SET(_X)   (TST (( _X )) ? 1 : 0)
  44 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
  45
  46 /* Space saver macro.  */
  47 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
  48
  49 #define HALT_UNALLOC                                                    \
  50   do                                                                    \
  51     {                                                                   \
  52       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  53       TRACE_INSN (cpu,                                                  \
  54                   "Unallocated instruction detected at sim line %d,"    \
  55                   " exe addr %" PRIx64,                                 \
  56                   __LINE__, aarch64_get_PC (cpu));                      \
  57       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  58                        sim_stopped, SIM_SIGILL);                        \
  59     }                                                                   \
  60   while (0)
  61
  62 #define HALT_NYI                                                        \
  63   do                                                                    \
  64     {                                                                   \
  65       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  66       TRACE_INSN (cpu,                                                  \
  67                   "Unimplemented instruction detected at sim line %d,"  \
  68                   " exe addr %" PRIx64,                                 \
  69                   __LINE__, aarch64_get_PC (cpu));                      \
  70       if (! TRACE_ANY_P (cpu))                                          \
  71         sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
  72                         aarch64_get_instr (cpu));                       \
  73       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  74                        sim_stopped, SIM_SIGABRT);                       \
  75     }                                                                   \
  76   while (0)
  77
  78 #define NYI_assert(HI, LO, EXPECTED)                                    \
  79   do                                                                    \
  80     {                                                                   \
  81       if (INSTR ((HI), (LO)) != (EXPECTED))                             \
  82         HALT_NYI;                                                       \
  83     }                                                                   \
  84   while (0)
  85
  86 /* Helper functions used by expandLogicalImmediate.  */
  87
  88 /* for i = 1, ... N result<i-1> = 1 other bits are zero  */
  89 static inline uint64_t
  90 ones (int N)
  91 {
  92   return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
  93 }
  94
  95 /* result<0> to val<N>  */
  96 static inline uint64_t
  97 pickbit (uint64_t val, int N)
  98 {
  99   return pickbits64 (val, N, N);
 100 }
 101
 102 static uint64_t
 103 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
 104 {
 105   uint64_t mask;
 106   uint64_t imm;
 107   unsigned simd_size;
 108
 109   /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
 110      (in other words, right rotated by R), then replicated. */
 111   if (N != 0)
 112     {
 113       simd_size = 64;
 114       mask = 0xffffffffffffffffull;
 115     }
 116   else
 117     {
 118       switch (S)
 119         {
 120         case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32;           break;
 121         case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
 122         case 0x30 ... 0x37: /* 110xxx */ simd_size =  8; S &= 0x7; break;
 123         case 0x38 ... 0x3b: /* 1110xx */ simd_size =  4; S &= 0x3; break;
 124         case 0x3c ... 0x3d: /* 11110x */ simd_size =  2; S &= 0x1; break;
 125         default: return 0;
 126         }
 127       mask = (1ull << simd_size) - 1;
 128       /* Top bits are IGNORED.  */
 129       R &= simd_size - 1;
 130     }
 131
 132   /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected.  */
 133   if (S == simd_size - 1)
 134     return 0;
 135
 136   /* S+1 consecutive bits to 1.  */
 137   /* NOTE: S can't be 63 due to detection above.  */
 138   imm = (1ull << (S + 1)) - 1;
 139
 140   /* Rotate to the left by simd_size - R.  */
 141   if (R != 0)
 142     imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
 143
 144   /* Replicate the value according to SIMD size.  */
 145   switch (simd_size)
 146     {
 147     case  2: imm = (imm <<  2) | imm;
 148     case  4: imm = (imm <<  4) | imm;
 149     case  8: imm = (imm <<  8) | imm;
 150     case 16: imm = (imm << 16) | imm;
 151     case 32: imm = (imm << 32) | imm;
 152     case 64: break;
 153     default: return 0;
 154     }
 155
 156   return imm;
 157 }
 158
 159 /* Instr[22,10] encodes N immr and imms. we want a lookup table
 160    for each possible combination i.e. 13 bits worth of int entries.  */
 161 #define  LI_TABLE_SIZE  (1 << 13)
 162 static uint64_t LITable[LI_TABLE_SIZE];
 163
 164 void
 165 aarch64_init_LIT_table (void)
 166 {
 167   unsigned index;
 168
 169   for (index = 0; index < LI_TABLE_SIZE; index++)
 170     {
 171       uint32_t N    = uimm (index, 12, 12);
 172       uint32_t immr = uimm (index, 11, 6);
 173       uint32_t imms = uimm (index, 5, 0);
 174
 175       LITable [index] = expand_logical_immediate (imms, immr, N);
 176     }
 177 }
 178
 179 static void
 180 dexNotify (sim_cpu *cpu)
 181 {
 182   /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
 183                            2 ==> exit Java, 3 ==> start next bytecode.  */
 184   uint32_t type = INSTR (14, 0);
 185
 186   TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
 187
 188   switch (type)
 189     {
 190     case 0:
 191       /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
 192          aarch64_get_reg_u64 (cpu, R22, 0));  */
 193       break;
 194     case 1:
 195       /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
 196          aarch64_get_reg_u64 (cpu, R22, 0));  */
 197       break;
 198     case 2:
 199       /* aarch64_notifyMethodExit ();  */
 200       break;
 201     case 3:
 202       /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
 203          aarch64_get_reg_u64 (cpu, R22, 0));  */
 204       break;
 205     }
 206 }
 207
 208 /* secondary decode within top level groups  */
 209
 210 static void
 211 dexPseudo (sim_cpu *cpu)
 212 {
 213   /* assert instr[28,27] = 00
 214
 215      We provide 2 pseudo instructions:
 216
 217      HALT stops execution of the simulator causing an immediate
 218      return to the x86 code which entered it.
 219
 220      CALLOUT initiates recursive entry into x86 code.  A register
 221      argument holds the address of the x86 routine.  Immediate
 222      values in the instruction identify the number of general
 223      purpose and floating point register arguments to be passed
 224      and the type of any value to be returned.  */
 225
 226   uint32_t PSEUDO_HALT      =  0xE0000000U;
 227   uint32_t PSEUDO_CALLOUT   =  0x00018000U;
 228   uint32_t PSEUDO_CALLOUTR  =  0x00018001U;
 229   uint32_t PSEUDO_NOTIFY    =  0x00014000U;
 230   uint32_t dispatch;
 231
 232   if (aarch64_get_instr (cpu) == PSEUDO_HALT)
 233     {
 234       TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
 235       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 236                        sim_stopped, SIM_SIGTRAP);
 237     }
 238
 239   dispatch = INSTR (31, 15);
 240
 241   /* We do not handle callouts at the moment.  */
 242   if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
 243     {
 244       TRACE_EVENTS (cpu, " Callout");
 245       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 246                        sim_stopped, SIM_SIGABRT);
 247     }
 248
 249   else if (dispatch == PSEUDO_NOTIFY)
 250     dexNotify (cpu);
 251
 252   else
 253     HALT_UNALLOC;
 254 }
 255
 256 /* Load-store single register (unscaled offset)
 257    These instructions employ a base register plus an unscaled signed
 258    9 bit offset.
 259
 260    N.B. the base register (source) can be Xn or SP. all other
 261    registers may not be SP.  */
 262
 263 /* 32 bit load 32 bit unscaled signed 9 bit.  */
 264 static void
 265 ldur32 (sim_cpu *cpu, int32_t offset)
 266 {
 267   unsigned rn = INSTR (9, 5);
 268   unsigned rt = INSTR (4, 0);
 269
 270   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 271   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 272                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 273                         + offset));
 274 }
 275
 276 /* 64 bit load 64 bit unscaled signed 9 bit.  */
 277 static void
 278 ldur64 (sim_cpu *cpu, int32_t offset)
 279 {
 280   unsigned rn = INSTR (9, 5);
 281   unsigned rt = INSTR (4, 0);
 282
 283   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 284   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 285                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 286                         + offset));
 287 }
 288
 289 /* 32 bit load zero-extended byte unscaled signed 9 bit.  */
 290 static void
 291 ldurb32 (sim_cpu *cpu, int32_t offset)
 292 {
 293   unsigned rn = INSTR (9, 5);
 294   unsigned rt = INSTR (4, 0);
 295
 296   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 297   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
 298                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 299                         + offset));
 300 }
 301
 302 /* 32 bit load sign-extended byte unscaled signed 9 bit.  */
 303 static void
 304 ldursb32 (sim_cpu *cpu, int32_t offset)
 305 {
 306   unsigned rn = INSTR (9, 5);
 307   unsigned rt = INSTR (4, 0);
 308
 309   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 310   aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
 311                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 312                         + offset));
 313 }
 314
 315 /* 64 bit load sign-extended byte unscaled signed 9 bit.  */
 316 static void
 317 ldursb64 (sim_cpu *cpu, int32_t offset)
 318 {
 319   unsigned rn = INSTR (9, 5);
 320   unsigned rt = INSTR (4, 0);
 321
 322   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 323   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
 324                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 325                         + offset));
 326 }
 327
 328 /* 32 bit load zero-extended short unscaled signed 9 bit  */
 329 static void
 330 ldurh32 (sim_cpu *cpu, int32_t offset)
 331 {
 332   unsigned rn = INSTR (9, 5);
 333   unsigned rd = INSTR (4, 0);
 334
 335   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 336   aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
 337                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 338                         + offset));
 339 }
 340
 341 /* 32 bit load sign-extended short unscaled signed 9 bit  */
 342 static void
 343 ldursh32 (sim_cpu *cpu, int32_t offset)
 344 {
 345   unsigned rn = INSTR (9, 5);
 346   unsigned rd = INSTR (4, 0);
 347
 348   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 349   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
 350                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 351                         + offset));
 352 }
 353
 354 /* 64 bit load sign-extended short unscaled signed 9 bit  */
 355 static void
 356 ldursh64 (sim_cpu *cpu, int32_t offset)
 357 {
 358   unsigned rn = INSTR (9, 5);
 359   unsigned rt = INSTR (4, 0);
 360
 361   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 362   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
 363                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 364                         + offset));
 365 }
 366
 367 /* 64 bit load sign-extended word unscaled signed 9 bit  */
 368 static void
 369 ldursw (sim_cpu *cpu, int32_t offset)
 370 {
 371   unsigned rn = INSTR (9, 5);
 372   unsigned rd = INSTR (4, 0);
 373
 374   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 375   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
 376                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 377                         + offset));
 378 }
 379
 380 /* N.B. with stores the value in source is written to the address
 381    identified by source2 modified by offset.  */
 382
 383 /* 32 bit store 32 bit unscaled signed 9 bit.  */
 384 static void
 385 stur32 (sim_cpu *cpu, int32_t offset)
 386 {
 387   unsigned rn = INSTR (9, 5);
 388   unsigned rd = INSTR (4, 0);
 389
 390   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 391   aarch64_set_mem_u32 (cpu,
 392                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 393                        aarch64_get_reg_u32 (cpu, rd, NO_SP));
 394 }
 395
 396 /* 64 bit store 64 bit unscaled signed 9 bit  */
 397 static void
 398 stur64 (sim_cpu *cpu, int32_t offset)
 399 {
 400   unsigned rn = INSTR (9, 5);
 401   unsigned rd = INSTR (4, 0);
 402
 403   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 404   aarch64_set_mem_u64 (cpu,
 405                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 406                        aarch64_get_reg_u64 (cpu, rd, NO_SP));
 407 }
 408
 409 /* 32 bit store byte unscaled signed 9 bit  */
 410 static void
 411 sturb (sim_cpu *cpu, int32_t offset)
 412 {
 413   unsigned rn = INSTR (9, 5);
 414   unsigned rd = INSTR (4, 0);
 415
 416   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 417   aarch64_set_mem_u8 (cpu,
 418                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 419                       aarch64_get_reg_u8 (cpu, rd, NO_SP));
 420 }
 421
 422 /* 32 bit store short unscaled signed 9 bit  */
 423 static void
 424 sturh (sim_cpu *cpu, int32_t offset)
 425 {
 426   unsigned rn = INSTR (9, 5);
 427   unsigned rd = INSTR (4, 0);
 428
 429   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 430   aarch64_set_mem_u16 (cpu,
 431                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 432                        aarch64_get_reg_u16 (cpu, rd, NO_SP));
 433 }
 434
 435 /* Load single register pc-relative label
 436    Offset is a signed 19 bit immediate count in words
 437    rt may not be SP.  */
 438
 439 /* 32 bit pc-relative load  */
 440 static void
 441 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
 442 {
 443   unsigned rd = INSTR (4, 0);
 444
 445   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 446   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 447                        aarch64_get_mem_u32
 448                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 449 }
 450
 451 /* 64 bit pc-relative load  */
 452 static void
 453 ldr_pcrel (sim_cpu *cpu, int32_t offset)
 454 {
 455   unsigned rd = INSTR (4, 0);
 456
 457   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 458   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 459                        aarch64_get_mem_u64
 460                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 461 }
 462
 463 /* sign extended 32 bit pc-relative load  */
 464 static void
 465 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
 466 {
 467   unsigned rd = INSTR (4, 0);
 468
 469   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 470   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 471                        aarch64_get_mem_s32
 472                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 473 }
 474
 475 /* float pc-relative load  */
 476 static void
 477 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
 478 {
 479   unsigned int rd = INSTR (4, 0);
 480
 481   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 482   aarch64_set_vec_u32 (cpu, rd, 0,
 483                        aarch64_get_mem_u32
 484                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 485 }
 486
 487 /* double pc-relative load  */
 488 static void
 489 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
 490 {
 491   unsigned int st = INSTR (4, 0);
 492
 493   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 494   aarch64_set_vec_u64 (cpu, st, 0,
 495                        aarch64_get_mem_u64
 496                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 497 }
 498
 499 /* long double pc-relative load.  */
 500 static void
 501 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
 502 {
 503   unsigned int st = INSTR (4, 0);
 504   uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
 505   FRegister a;
 506
 507   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 508   aarch64_get_mem_long_double (cpu, addr, & a);
 509   aarch64_set_FP_long_double (cpu, st, a);
 510 }
 511
 512 /* This can be used to scale an offset by applying
 513    the requisite shift. the second argument is either
 514    16, 32 or 64.  */
 515
 516 #define SCALE(_offset, _elementSize) \
 517     ((_offset) << ScaleShift ## _elementSize)
 518
 519 /* This can be used to optionally scale a register derived offset
 520    by applying the requisite shift as indicated by the Scaling
 521    argument.  The second argument is either Byte, Short, Word
 522    or Long. The third argument is either Scaled or Unscaled.
 523    N.B. when _Scaling is Scaled the shift gets ANDed with
 524    all 1s while when it is Unscaled it gets ANDed with 0.  */
 525
 526 #define OPT_SCALE(_offset, _elementType, _Scaling) \
 527   ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
 528
 529 /* This can be used to zero or sign extend a 32 bit register derived
 530    value to a 64 bit value.  the first argument must be the value as
 531    a uint32_t and the second must be either UXTW or SXTW. The result
 532    is returned as an int64_t.  */
 533
 534 static inline int64_t
 535 extend (uint32_t value, Extension extension)
 536 {
 537   union
 538   {
 539     uint32_t u;
 540     int32_t   n;
 541   } x;
 542
 543   /* A branchless variant of this ought to be possible.  */
 544   if (extension == UXTW || extension == NoExtension)
 545     return value;
 546
 547   x.u = value;
 548   return x.n;
 549 }
 550
 551 /* Scalar Floating Point
 552
 553    FP load/store single register (4 addressing modes)
 554
 555    N.B. the base register (source) can be the stack pointer.
 556    The secondary source register (source2) can only be an Xn register.  */
 557
 558 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 559 static void
 560 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 561 {
 562   unsigned rn = INSTR (9, 5);
 563   unsigned st = INSTR (4, 0);
 564   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 565
 566   if (wb != Post)
 567     address += offset;
 568
 569   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 570   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
 571   if (wb == Post)
 572     address += offset;
 573
 574   if (wb != NoWriteBack)
 575     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 576 }
 577
 578 /* Load 8 bit with unsigned 12 bit offset.  */
 579 static void
 580 fldrb_abs (sim_cpu *cpu, uint32_t offset)
 581 {
 582   unsigned rd = INSTR (4, 0);
 583   unsigned rn = INSTR (9, 5);
 584   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
 585
 586   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 587   aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 588 }
 589
 590 /* Load 16 bit scaled unsigned 12 bit.  */
 591 static void
 592 fldrh_abs (sim_cpu *cpu, uint32_t offset)
 593 {
 594   unsigned rd = INSTR (4, 0);
 595   unsigned rn = INSTR (9, 5);
 596   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
 597
 598   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 599   aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
 600 }
 601
 602 /* Load 32 bit scaled unsigned 12 bit.  */
 603 static void
 604 fldrs_abs (sim_cpu *cpu, uint32_t offset)
 605 {
 606   unsigned rd = INSTR (4, 0);
 607   unsigned rn = INSTR (9, 5);
 608   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
 609
 610   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 611   aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 612 }
 613
 614 /* Load 64 bit scaled unsigned 12 bit.  */
 615 static void
 616 fldrd_abs (sim_cpu *cpu, uint32_t offset)
 617 {
 618   unsigned rd = INSTR (4, 0);
 619   unsigned rn = INSTR (9, 5);
 620   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
 621
 622   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 623   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 624 }
 625
 626 /* Load 128 bit scaled unsigned 12 bit.  */
 627 static void
 628 fldrq_abs (sim_cpu *cpu, uint32_t offset)
 629 {
 630   unsigned rd = INSTR (4, 0);
 631   unsigned rn = INSTR (9, 5);
 632   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
 633
 634   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 635   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 636   aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
 637 }
 638
 639 /* Load 32 bit scaled or unscaled zero- or sign-extended
 640    32-bit register offset.  */
 641 static void
 642 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 643 {
 644   unsigned rm = INSTR (20, 16);
 645   unsigned rn = INSTR (9, 5);
 646   unsigned st = INSTR (4, 0);
 647   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 648   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 649   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
 650
 651   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 652   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
 653                        (cpu, address + displacement));
 654 }
 655
 656 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 657 static void
 658 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 659 {
 660   unsigned rn = INSTR (9, 5);
 661   unsigned st = INSTR (4, 0);
 662   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 663
 664   if (wb != Post)
 665     address += offset;
 666
 667   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 668   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
 669
 670   if (wb == Post)
 671     address += offset;
 672
 673   if (wb != NoWriteBack)
 674     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 675 }
 676
 677 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset.  */
 678 static void
 679 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 680 {
 681   unsigned rm = INSTR (20, 16);
 682   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 683   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
 684
 685   fldrd_wb (cpu, displacement, NoWriteBack);
 686 }
 687
 688 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback.  */
 689 static void
 690 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 691 {
 692   FRegister a;
 693   unsigned rn = INSTR (9, 5);
 694   unsigned st = INSTR (4, 0);
 695   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 696
 697   if (wb != Post)
 698     address += offset;
 699
 700   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 701   aarch64_get_mem_long_double (cpu, address, & a);
 702   aarch64_set_FP_long_double (cpu, st, a);
 703
 704   if (wb == Post)
 705     address += offset;
 706
 707   if (wb != NoWriteBack)
 708     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 709 }
 710
 711 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset  */
 712 static void
 713 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 714 {
 715   unsigned rm = INSTR (20, 16);
 716   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 717   uint64_t displacement = OPT_SCALE (extended, 128, scaling);
 718
 719   fldrq_wb (cpu, displacement, NoWriteBack);
 720 }
 721
 722 /* Memory Access
 723
 724    load-store single register
 725    There are four addressing modes available here which all employ a
 726    64 bit source (base) register.
 727
 728    N.B. the base register (source) can be the stack pointer.
 729    The secondary source register (source2)can only be an Xn register.
 730
 731    Scaled, 12-bit, unsigned immediate offset, without pre- and
 732    post-index options.
 733    Unscaled, 9-bit, signed immediate offset with pre- or post-index
 734    writeback.
 735    scaled or unscaled 64-bit register offset.
 736    scaled or unscaled 32-bit extended register offset.
 737
 738    All offsets are assumed to be raw from the decode i.e. the
 739    simulator is expected to adjust scaled offsets based on the
 740    accessed data size with register or extended register offset
 741    versions the same applies except that in the latter case the
 742    operation may also require a sign extend.
 743
 744    A separate method is provided for each possible addressing mode.  */
 745
 746 /* 32 bit load 32 bit scaled unsigned 12 bit  */
 747 static void
 748 ldr32_abs (sim_cpu *cpu, uint32_t offset)
 749 {
 750   unsigned rn = INSTR (9, 5);
 751   unsigned rt = INSTR (4, 0);
 752
 753   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 754   /* The target register may not be SP but the source may be.  */
 755   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 756                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 757                         + SCALE (offset, 32)));
 758 }
 759
 760 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 761 static void
 762 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 763 {
 764   unsigned rn = INSTR (9, 5);
 765   unsigned rt = INSTR (4, 0);
 766   uint64_t address;
 767
 768   if (rn == rt && wb != NoWriteBack)
 769     HALT_UNALLOC;
 770
 771   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 772
 773   if (wb != Post)
 774     address += offset;
 775
 776   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 777   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
 778
 779   if (wb == Post)
 780     address += offset;
 781
 782   if (wb != NoWriteBack)
 783     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 784 }
 785
 786 /* 32 bit load 32 bit scaled or unscaled
 787    zero- or sign-extended 32-bit register offset  */
 788 static void
 789 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 790 {
 791   unsigned rm = INSTR (20, 16);
 792   unsigned rn = INSTR (9, 5);
 793   unsigned rt = INSTR (4, 0);
 794   /* rn may reference SP, rm and rt must reference ZR  */
 795
 796   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 797   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 798   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
 799
 800   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 801   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 802                        aarch64_get_mem_u32 (cpu, address + displacement));
 803 }
 804
 805 /* 64 bit load 64 bit scaled unsigned 12 bit  */
 806 static void
 807 ldr_abs (sim_cpu *cpu, uint32_t offset)
 808 {
 809   unsigned rn = INSTR (9, 5);
 810   unsigned rt = INSTR (4, 0);
 811
 812   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 813   /* The target register may not be SP but the source may be.  */
 814   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 815                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 816                         + SCALE (offset, 64)));
 817 }
 818
 819 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 820 static void
 821 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 822 {
 823   unsigned rn = INSTR (9, 5);
 824   unsigned rt = INSTR (4, 0);
 825   uint64_t address;
 826
 827   if (rn == rt && wb != NoWriteBack)
 828     HALT_UNALLOC;
 829
 830   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 831
 832   if (wb != Post)
 833     address += offset;
 834
 835   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 836   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
 837
 838   if (wb == Post)
 839     address += offset;
 840
 841   if (wb != NoWriteBack)
 842     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 843 }
 844
 845 /* 64 bit load 64 bit scaled or unscaled zero-
 846    or sign-extended 32-bit register offset.  */
 847 static void
 848 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 849 {
 850   unsigned rm = INSTR (20, 16);
 851   unsigned rn = INSTR (9, 5);
 852   unsigned rt = INSTR (4, 0);
 853   /* rn may reference SP, rm and rt must reference ZR  */
 854
 855   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 856   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 857   uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
 858
 859   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 860   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 861                        aarch64_get_mem_u64 (cpu, address + displacement));
 862 }
 863
 864 /* 32 bit load zero-extended byte scaled unsigned 12 bit.  */
 865 static void
 866 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
 867 {
 868   unsigned rn = INSTR (9, 5);
 869   unsigned rt = INSTR (4, 0);
 870
 871   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 872   /* The target register may not be SP but the source may be
 873      there is no scaling required for a byte load.  */
 874   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 875                        aarch64_get_mem_u8
 876                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
 877 }
 878
 879 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback.  */
 880 static void
 881 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 882 {
 883   unsigned rn = INSTR (9, 5);
 884   unsigned rt = INSTR (4, 0);
 885   uint64_t address;
 886
 887   if (rn == rt && wb != NoWriteBack)
 888     HALT_UNALLOC;
 889
 890   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 891
 892   if (wb != Post)
 893     address += offset;
 894
 895   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 896   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
 897
 898   if (wb == Post)
 899     address += offset;
 900
 901   if (wb != NoWriteBack)
 902     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 903 }
 904
 905 /* 32 bit load zero-extended byte scaled or unscaled zero-
 906    or sign-extended 32-bit register offset.  */
 907 static void
 908 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 909 {
 910   unsigned rm = INSTR (20, 16);
 911   unsigned rn = INSTR (9, 5);
 912   unsigned rt = INSTR (4, 0);
 913   /* rn may reference SP, rm and rt must reference ZR  */
 914
 915   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 916   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 917                                  extension);
 918
 919   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 920   /* There is no scaling required for a byte load.  */
 921   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 922                        aarch64_get_mem_u8 (cpu, address + displacement));
 923 }
 924
 925 /* 64 bit load sign-extended byte unscaled signed 9 bit
 926    with pre- or post-writeback.  */
 927 static void
 928 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 929 {
 930   unsigned rn = INSTR (9, 5);
 931   unsigned rt = INSTR (4, 0);
 932   uint64_t address;
 933   int64_t val;
 934
 935   if (rn == rt && wb != NoWriteBack)
 936     HALT_UNALLOC;
 937
 938   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 939
 940   if (wb != Post)
 941     address += offset;
 942
 943   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 944   val = aarch64_get_mem_s8 (cpu, address);
 945   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
 946
 947   if (wb == Post)
 948     address += offset;
 949
 950   if (wb != NoWriteBack)
 951     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 952 }
 953
 954 /* 64 bit load sign-extended byte scaled unsigned 12 bit.  */
 955 static void
 956 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
 957 {
 958   ldrsb_wb (cpu, offset, NoWriteBack);
 959 }
 960
 961 /* 64 bit load sign-extended byte scaled or unscaled zero-
 962    or sign-extended 32-bit register offset.  */
 963 static void
 964 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 965 {
 966   unsigned rm = INSTR (20, 16);
 967   unsigned rn = INSTR (9, 5);
 968   unsigned rt = INSTR (4, 0);
 969   /* rn may reference SP, rm and rt must reference ZR  */
 970
 971   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 972   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 973                                  extension);
 974   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 975   /* There is no scaling required for a byte load.  */
 976   aarch64_set_reg_s64 (cpu, rt, NO_SP,
 977                        aarch64_get_mem_s8 (cpu, address + displacement));
 978 }
 979
 980 /* 32 bit load zero-extended short scaled unsigned 12 bit.  */
 981 static void
 982 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
 983 {
 984   unsigned rn = INSTR (9, 5);
 985   unsigned rt = INSTR (4, 0);
 986   uint32_t val;
 987
 988   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 989   /* The target register may not be SP but the source may be.  */
 990   val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 991                              + SCALE (offset, 16));
 992   aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
 993 }
 994
 995 /* 32 bit load zero-extended short unscaled signed 9 bit
 996    with pre- or post-writeback.  */
 997 static void
 998 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 999 {
1000   unsigned rn = INSTR (9, 5);
1001   unsigned rt = INSTR (4, 0);
1002   uint64_t address;
1003
1004   if (rn == rt && wb != NoWriteBack)
1005     HALT_UNALLOC;
1006
1007   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1008
1009   if (wb != Post)
1010     address += offset;
1011
1012   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1013   aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1014
1015   if (wb == Post)
1016     address += offset;
1017
1018   if (wb != NoWriteBack)
1019     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1020 }
1021
1022 /* 32 bit load zero-extended short scaled or unscaled zero-
1023    or sign-extended 32-bit register offset.  */
1024 static void
1025 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1026 {
1027   unsigned rm = INSTR (20, 16);
1028   unsigned rn = INSTR (9, 5);
1029   unsigned rt = INSTR (4, 0);
1030   /* rn may reference SP, rm and rt must reference ZR  */
1031
1032   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1033   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1034   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1035
1036   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1037   aarch64_set_reg_u32 (cpu, rt, NO_SP,
1038                        aarch64_get_mem_u16 (cpu, address + displacement));
1039 }
1040
1041 /* 32 bit load sign-extended short scaled unsigned 12 bit.  */
1042 static void
1043 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1044 {
1045   unsigned rn = INSTR (9, 5);
1046   unsigned rt = INSTR (4, 0);
1047   int32_t val;
1048
1049   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1050   /* The target register may not be SP but the source may be.  */
1051   val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1052                              + SCALE (offset, 16));
1053   aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1054 }
1055
1056 /* 32 bit load sign-extended short unscaled signed 9 bit
1057    with pre- or post-writeback.  */
1058 static void
1059 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1060 {
1061   unsigned rn = INSTR (9, 5);
1062   unsigned rt = INSTR (4, 0);
1063   uint64_t address;
1064
1065   if (rn == rt && wb != NoWriteBack)
1066     HALT_UNALLOC;
1067
1068   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1069
1070   if (wb != Post)
1071     address += offset;
1072
1073   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1074   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1075                        (int32_t) aarch64_get_mem_s16 (cpu, address));
1076
1077   if (wb == Post)
1078     address += offset;
1079
1080   if (wb != NoWriteBack)
1081     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1082 }
1083
1084 /* 32 bit load sign-extended short scaled or unscaled zero-
1085    or sign-extended 32-bit register offset.  */
1086 static void
1087 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1088 {
1089   unsigned rm = INSTR (20, 16);
1090   unsigned rn = INSTR (9, 5);
1091   unsigned rt = INSTR (4, 0);
1092   /* rn may reference SP, rm and rt must reference ZR  */
1093
1094   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1095   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1096   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1097
1098   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1099   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1100                        (int32_t) aarch64_get_mem_s16
1101                        (cpu, address + displacement));
1102 }
1103
1104 /* 64 bit load sign-extended short scaled unsigned 12 bit.  */
1105 static void
1106 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1107 {
1108   unsigned rn = INSTR (9, 5);
1109   unsigned rt = INSTR (4, 0);
1110   int64_t val;
1111
1112   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1113   /* The target register may not be SP but the source may be.  */
1114   val = aarch64_get_mem_s16  (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1115                               + SCALE (offset, 16));
1116   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1117 }
1118
1119 /* 64 bit load sign-extended short unscaled signed 9 bit
1120    with pre- or post-writeback.  */
1121 static void
1122 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1123 {
1124   unsigned rn = INSTR (9, 5);
1125   unsigned rt = INSTR (4, 0);
1126   uint64_t address;
1127   int64_t val;
1128
1129   if (rn == rt && wb != NoWriteBack)
1130     HALT_UNALLOC;
1131
1132   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1133   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1134
1135   if (wb != Post)
1136     address += offset;
1137
1138   val = aarch64_get_mem_s16 (cpu, address);
1139   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1140
1141   if (wb == Post)
1142     address += offset;
1143
1144   if (wb != NoWriteBack)
1145     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1146 }
1147
1148 /* 64 bit load sign-extended short scaled or unscaled zero-
1149    or sign-extended 32-bit register offset.  */
1150 static void
1151 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1152 {
1153   unsigned rm = INSTR (20, 16);
1154   unsigned rn = INSTR (9, 5);
1155   unsigned rt = INSTR (4, 0);
1156
1157   /* rn may reference SP, rm and rt must reference ZR  */
1158
1159   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1160   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1161   uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1162   int64_t val;
1163
1164   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1165   val = aarch64_get_mem_s16 (cpu, address + displacement);
1166   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1167 }
1168
1169 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit.  */
1170 static void
1171 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1172 {
1173   unsigned rn = INSTR (9, 5);
1174   unsigned rt = INSTR (4, 0);
1175   int64_t val;
1176
1177   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1178   val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1179                              + SCALE (offset, 32));
1180   /* The target register may not be SP but the source may be.  */
1181   return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1182 }
1183
1184 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1185    with pre- or post-writeback.  */
1186 static void
1187 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1188 {
1189   unsigned rn = INSTR (9, 5);
1190   unsigned rt = INSTR (4, 0);
1191   uint64_t address;
1192
1193   if (rn == rt && wb != NoWriteBack)
1194     HALT_UNALLOC;
1195
1196   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1197
1198   if (wb != Post)
1199     address += offset;
1200
1201   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1202   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1203
1204   if (wb == Post)
1205     address += offset;
1206
1207   if (wb != NoWriteBack)
1208     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1209 }
1210
1211 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1212    or sign-extended 32-bit register offset.  */
1213 static void
1214 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1215 {
1216   unsigned rm = INSTR (20, 16);
1217   unsigned rn = INSTR (9, 5);
1218   unsigned rt = INSTR (4, 0);
1219   /* rn may reference SP, rm and rt must reference ZR  */
1220
1221   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1222   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1223   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
1224
1225   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1226   aarch64_set_reg_s64 (cpu, rt, NO_SP,
1227                        aarch64_get_mem_s32 (cpu, address + displacement));
1228 }
1229
1230 /* N.B. with stores the value in source is written to the
1231    address identified by source2 modified by source3/offset.  */
1232
1233 /* 32 bit store scaled unsigned 12 bit.  */
1234 static void
1235 str32_abs (sim_cpu *cpu, uint32_t offset)
1236 {
1237   unsigned rn = INSTR (9, 5);
1238   unsigned rt = INSTR (4, 0);
1239
1240   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1241   /* The target register may not be SP but the source may be.  */
1242   aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1243                              + SCALE (offset, 32)),
1244                        aarch64_get_reg_u32 (cpu, rt, NO_SP));
1245 }
1246
1247 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1248 static void
1249 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1250 {
1251   unsigned rn = INSTR (9, 5);
1252   unsigned rt = INSTR (4, 0);
1253   uint64_t address;
1254
1255   if (rn == rt && wb != NoWriteBack)
1256     HALT_UNALLOC;
1257
1258   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1259   if (wb != Post)
1260     address += offset;
1261
1262   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1263   aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1264
1265   if (wb == Post)
1266     address += offset;
1267
1268   if (wb != NoWriteBack)
1269     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1270 }
1271
1272 /* 32 bit store scaled or unscaled zero- or
1273    sign-extended 32-bit register offset.  */
1274 static void
1275 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1276 {
1277   unsigned rm = INSTR (20, 16);
1278   unsigned rn = INSTR (9, 5);
1279   unsigned rt = INSTR (4, 0);
1280
1281   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1282   int64_t  extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1283   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1284
1285   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1286   aarch64_set_mem_u32 (cpu, address + displacement,
1287                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1288 }
1289
1290 /* 64 bit store scaled unsigned 12 bit.  */
1291 static void
1292 str_abs (sim_cpu *cpu, uint32_t offset)
1293 {
1294   unsigned rn = INSTR (9, 5);
1295   unsigned rt = INSTR (4, 0);
1296
1297   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1298   aarch64_set_mem_u64 (cpu,
1299                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
1300                        + SCALE (offset, 64),
1301                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1302 }
1303
1304 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1305 static void
1306 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1307 {
1308   unsigned rn = INSTR (9, 5);
1309   unsigned rt = INSTR (4, 0);
1310   uint64_t address;
1311
1312   if (rn == rt && wb != NoWriteBack)
1313     HALT_UNALLOC;
1314
1315   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1316
1317   if (wb != Post)
1318     address += offset;
1319
1320   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1321   aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1322
1323   if (wb == Post)
1324     address += offset;
1325
1326   if (wb != NoWriteBack)
1327     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1328 }
1329
1330 /* 64 bit store scaled or unscaled zero-
1331    or sign-extended 32-bit register offset.  */
1332 static void
1333 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1334 {
1335   unsigned rm = INSTR (20, 16);
1336   unsigned rn = INSTR (9, 5);
1337   unsigned rt = INSTR (4, 0);
1338   /* rn may reference SP, rm and rt must reference ZR  */
1339
1340   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1341   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1342                                extension);
1343   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1344
1345   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1346   aarch64_set_mem_u64 (cpu, address + displacement,
1347                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1348 }
1349
1350 /* 32 bit store byte scaled unsigned 12 bit.  */
1351 static void
1352 strb_abs (sim_cpu *cpu, uint32_t offset)
1353 {
1354   unsigned rn = INSTR (9, 5);
1355   unsigned rt = INSTR (4, 0);
1356
1357   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1358   /* The target register may not be SP but the source may be.
1359      There is no scaling required for a byte load.  */
1360   aarch64_set_mem_u8 (cpu,
1361                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1362                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1363 }
1364
1365 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback.  */
1366 static void
1367 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1368 {
1369   unsigned rn = INSTR (9, 5);
1370   unsigned rt = INSTR (4, 0);
1371   uint64_t address;
1372
1373   if (rn == rt && wb != NoWriteBack)
1374     HALT_UNALLOC;
1375
1376   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1377
1378   if (wb != Post)
1379     address += offset;
1380
1381   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1382   aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1383
1384   if (wb == Post)
1385     address += offset;
1386
1387   if (wb != NoWriteBack)
1388     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1389 }
1390
1391 /* 32 bit store byte scaled or unscaled zero-
1392    or sign-extended 32-bit register offset.  */
1393 static void
1394 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1395 {
1396   unsigned rm = INSTR (20, 16);
1397   unsigned rn = INSTR (9, 5);
1398   unsigned rt = INSTR (4, 0);
1399   /* rn may reference SP, rm and rt must reference ZR  */
1400
1401   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1402   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1403                                  extension);
1404
1405   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1406   /* There is no scaling required for a byte load.  */
1407   aarch64_set_mem_u8 (cpu, address + displacement,
1408                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1409 }
1410
1411 /* 32 bit store short scaled unsigned 12 bit.  */
1412 static void
1413 strh_abs (sim_cpu *cpu, uint32_t offset)
1414 {
1415   unsigned rn = INSTR (9, 5);
1416   unsigned rt = INSTR (4, 0);
1417
1418   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1419   /* The target register may not be SP but the source may be.  */
1420   aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1421                        + SCALE (offset, 16),
1422                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1423 }
1424
1425 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback.  */
1426 static void
1427 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1428 {
1429   unsigned rn = INSTR (9, 5);
1430   unsigned rt = INSTR (4, 0);
1431   uint64_t address;
1432
1433   if (rn == rt && wb != NoWriteBack)
1434     HALT_UNALLOC;
1435
1436   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1437
1438   if (wb != Post)
1439     address += offset;
1440
1441   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1442   aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1443
1444   if (wb == Post)
1445     address += offset;
1446
1447   if (wb != NoWriteBack)
1448     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1449 }
1450
1451 /* 32 bit store short scaled or unscaled zero-
1452    or sign-extended 32-bit register offset.  */
1453 static void
1454 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1455 {
1456   unsigned rm = INSTR (20, 16);
1457   unsigned rn = INSTR (9, 5);
1458   unsigned rt = INSTR (4, 0);
1459   /* rn may reference SP, rm and rt must reference ZR  */
1460
1461   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1462   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1463   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1464
1465   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1466   aarch64_set_mem_u16 (cpu, address + displacement,
1467                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1468 }
1469
1470 /* Prefetch unsigned 12 bit.  */
1471 static void
1472 prfm_abs (sim_cpu *cpu, uint32_t offset)
1473 {
1474   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1475                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1476                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1477                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1478                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1479                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1480                           ow ==> UNALLOC
1481      PrfOp prfop = prfop (instr, 4, 0);
1482      uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1483      + SCALE (offset, 64).  */
1484
1485   /* TODO : implement prefetch of address.  */
1486 }
1487
1488 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset.  */
1489 static void
1490 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1491 {
1492   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1493                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1494                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1495                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1496                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1497                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1498                           ow ==> UNALLOC
1499      rn may reference SP, rm may only reference ZR
1500      PrfOp prfop = prfop (instr, 4, 0);
1501      uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1502      int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1503                                 extension);
1504      uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
1505      uint64_t address = base + displacement.  */
1506
1507   /* TODO : implement prefetch of address  */
1508 }
1509
1510 /* 64 bit pc-relative prefetch.  */
1511 static void
1512 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1513 {
1514   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1515                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1516                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1517                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1518                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1519                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1520                           ow ==> UNALLOC
1521      PrfOp prfop = prfop (instr, 4, 0);
1522      uint64_t address = aarch64_get_PC (cpu) + offset.  */
1523
1524   /* TODO : implement this  */
1525 }
1526
1527 /* Load-store exclusive.  */
1528
1529 static void
1530 ldxr (sim_cpu *cpu)
1531 {
1532   unsigned rn = INSTR (9, 5);
1533   unsigned rt = INSTR (4, 0);
1534   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1535   int size = INSTR (31, 30);
1536   /* int ordered = INSTR (15, 15);  */
1537   /* int exclusive = ! INSTR (23, 23);  */
1538
1539   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1540   switch (size)
1541     {
1542     case 0:
1543       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1544       break;
1545     case 1:
1546       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1547       break;
1548     case 2:
1549       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1550       break;
1551     case 3:
1552       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1553       break;
1554     }
1555 }
1556
1557 static void
1558 stxr (sim_cpu *cpu)
1559 {
1560   unsigned rn = INSTR (9, 5);
1561   unsigned rt = INSTR (4, 0);
1562   unsigned rs = INSTR (20, 16);
1563   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1564   int      size = INSTR (31, 30);
1565   uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1566
1567   switch (size)
1568     {
1569     case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1570     case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1571     case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1572     case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1573     }
1574
1575   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1576   aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive...  */
1577 }
1578
1579 static void
1580 dexLoadLiteral (sim_cpu *cpu)
1581 {
1582   /* instr[29,27] == 011
1583      instr[25,24] == 00
1584      instr[31,30:26] = opc: 000 ==> LDRW,  001 ==> FLDRS
1585                             010 ==> LDRX,  011 ==> FLDRD
1586                             100 ==> LDRSW, 101 ==> FLDRQ
1587                             110 ==> PRFM, 111 ==> UNALLOC
1588      instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1589      instr[23, 5] == simm19  */
1590
1591   /* unsigned rt = INSTR (4, 0);  */
1592   uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1593   int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1594
1595   switch (dispatch)
1596     {
1597     case 0: ldr32_pcrel (cpu, imm); break;
1598     case 1: fldrs_pcrel (cpu, imm); break;
1599     case 2: ldr_pcrel   (cpu, imm); break;
1600     case 3: fldrd_pcrel (cpu, imm); break;
1601     case 4: ldrsw_pcrel (cpu, imm); break;
1602     case 5: fldrq_pcrel (cpu, imm); break;
1603     case 6: prfm_pcrel  (cpu, imm); break;
1604     case 7:
1605     default:
1606       HALT_UNALLOC;
1607     }
1608 }
1609
1610 /* Immediate arithmetic
1611    The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1612    value left shifted by 12 bits (done at decode).
1613
1614    N.B. the register args (dest, source) can normally be Xn or SP.
1615    the exception occurs for flag setting instructions which may
1616    only use Xn for the output (dest).  */
1617
1618 /* 32 bit add immediate.  */
1619 static void
1620 add32 (sim_cpu *cpu, uint32_t aimm)
1621 {
1622   unsigned rn = INSTR (9, 5);
1623   unsigned rd = INSTR (4, 0);
1624
1625   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1626   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1627                        aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1628 }
1629
1630 /* 64 bit add immediate.  */
1631 static void
1632 add64 (sim_cpu *cpu, uint32_t aimm)
1633 {
1634   unsigned rn = INSTR (9, 5);
1635   unsigned rd = INSTR (4, 0);
1636
1637   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1638   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1639                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1640 }
1641
1642 static void
1643 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1644 {
1645   int32_t   result = value1 + value2;
1646   int64_t   sresult = (int64_t) value1 + (int64_t) value2;
1647   uint64_t  uresult = (uint64_t)(uint32_t) value1
1648     + (uint64_t)(uint32_t) value2;
1649   uint32_t  flags = 0;
1650
1651   if (result == 0)
1652     flags |= Z;
1653
1654   if (result & (1 << 31))
1655     flags |= N;
1656
1657   if (uresult != (uint32_t)uresult)
1658     flags |= C;
1659
1660   if (sresult != (int32_t)sresult)
1661     flags |= V;
1662
1663   aarch64_set_CPSR (cpu, flags);
1664 }
1665
1666 #define NEG(a) (((a) & signbit) == signbit)
1667 #define POS(a) (((a) & signbit) == 0)
1668
1669 static void
1670 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1671 {
1672   uint64_t result = value1 + value2;
1673   uint32_t flags = 0;
1674   uint64_t signbit = 1ULL << 63;
1675
1676   if (result == 0)
1677     flags |= Z;
1678
1679   if (NEG (result))
1680     flags |= N;
1681
1682   if (   (NEG (value1) && NEG (value2))
1683       || (NEG (value1) && POS (result))
1684       || (NEG (value2) && POS (result)))
1685     flags |= C;
1686
1687   if (   (NEG (value1) && NEG (value2) && POS (result))
1688       || (POS (value1) && POS (value2) && NEG (result)))
1689     flags |= V;
1690
1691   aarch64_set_CPSR (cpu, flags);
1692 }
1693
1694 static void
1695 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1696 {
1697   uint32_t result = value1 - value2;
1698   uint32_t flags = 0;
1699   uint32_t signbit = 1U << 31;
1700
1701   if (result == 0)
1702     flags |= Z;
1703
1704   if (NEG (result))
1705     flags |= N;
1706
1707   if (   (NEG (value1) && POS (value2))
1708       || (NEG (value1) && POS (result))
1709       || (POS (value2) && POS (result)))
1710     flags |= C;
1711
1712   if (   (NEG (value1) && POS (value2) && POS (result))
1713       || (POS (value1) && NEG (value2) && NEG (result)))
1714     flags |= V;
1715
1716   aarch64_set_CPSR (cpu, flags);
1717 }
1718
1719 static void
1720 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1721 {
1722   uint64_t result = value1 - value2;
1723   uint32_t flags = 0;
1724   uint64_t signbit = 1ULL << 63;
1725
1726   if (result == 0)
1727     flags |= Z;
1728
1729   if (NEG (result))
1730     flags |= N;
1731
1732   if (   (NEG (value1) && POS (value2))
1733       || (NEG (value1) && POS (result))
1734       || (POS (value2) && POS (result)))
1735     flags |= C;
1736
1737   if (   (NEG (value1) && POS (value2) && POS (result))
1738       || (POS (value1) && NEG (value2) && NEG (result)))
1739     flags |= V;
1740
1741   aarch64_set_CPSR (cpu, flags);
1742 }
1743
1744 static void
1745 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1746 {
1747   uint32_t flags = 0;
1748
1749   if (result == 0)
1750     flags |= Z;
1751   else
1752     flags &= ~ Z;
1753
1754   if (result & (1 << 31))
1755     flags |= N;
1756   else
1757     flags &= ~ N;
1758
1759   aarch64_set_CPSR (cpu, flags);
1760 }
1761
1762 static void
1763 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1764 {
1765   uint32_t flags = 0;
1766
1767   if (result == 0)
1768     flags |= Z;
1769   else
1770     flags &= ~ Z;
1771
1772   if (result & (1ULL << 63))
1773     flags |= N;
1774   else
1775     flags &= ~ N;
1776
1777   aarch64_set_CPSR (cpu, flags);
1778 }
1779
1780 /* 32 bit add immediate set flags.  */
1781 static void
1782 adds32 (sim_cpu *cpu, uint32_t aimm)
1783 {
1784   unsigned rn = INSTR (9, 5);
1785   unsigned rd = INSTR (4, 0);
1786   /* TODO : do we need to worry about signs here?  */
1787   int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1788
1789   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1790   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1791   set_flags_for_add32 (cpu, value1, aimm);
1792 }
1793
1794 /* 64 bit add immediate set flags.  */
1795 static void
1796 adds64 (sim_cpu *cpu, uint32_t aimm)
1797 {
1798   unsigned rn = INSTR (9, 5);
1799   unsigned rd = INSTR (4, 0);
1800   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1801   uint64_t value2 = aimm;
1802
1803   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1804   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1805   set_flags_for_add64 (cpu, value1, value2);
1806 }
1807
1808 /* 32 bit sub immediate.  */
1809 static void
1810 sub32 (sim_cpu *cpu, uint32_t aimm)
1811 {
1812   unsigned rn = INSTR (9, 5);
1813   unsigned rd = INSTR (4, 0);
1814
1815   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1816   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1817                        aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1818 }
1819
1820 /* 64 bit sub immediate.  */
1821 static void
1822 sub64 (sim_cpu *cpu, uint32_t aimm)
1823 {
1824   unsigned rn = INSTR (9, 5);
1825   unsigned rd = INSTR (4, 0);
1826
1827   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1828   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1829                        aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1830 }
1831
1832 /* 32 bit sub immediate set flags.  */
1833 static void
1834 subs32 (sim_cpu *cpu, uint32_t aimm)
1835 {
1836   unsigned rn = INSTR (9, 5);
1837   unsigned rd = INSTR (4, 0);
1838   uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1839   uint32_t value2 = aimm;
1840
1841   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1842   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1843   set_flags_for_sub32 (cpu, value1, value2);
1844 }
1845
1846 /* 64 bit sub immediate set flags.  */
1847 static void
1848 subs64 (sim_cpu *cpu, uint32_t aimm)
1849 {
1850   unsigned rn = INSTR (9, 5);
1851   unsigned rd = INSTR (4, 0);
1852   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1853   uint32_t value2 = aimm;
1854
1855   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1856   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1857   set_flags_for_sub64 (cpu, value1, value2);
1858 }
1859
1860 /* Data Processing Register.  */
1861
1862 /* First two helpers to perform the shift operations.  */
1863
1864 static inline uint32_t
1865 shifted32 (uint32_t value, Shift shift, uint32_t count)
1866 {
1867   switch (shift)
1868     {
1869     default:
1870     case LSL:
1871       return (value << count);
1872     case LSR:
1873       return (value >> count);
1874     case ASR:
1875       {
1876         int32_t svalue = value;
1877         return (svalue >> count);
1878       }
1879     case ROR:
1880       {
1881         uint32_t top = value >> count;
1882         uint32_t bottom = value << (32 - count);
1883         return (bottom | top);
1884       }
1885     }
1886 }
1887
1888 static inline uint64_t
1889 shifted64 (uint64_t value, Shift shift, uint32_t count)
1890 {
1891   switch (shift)
1892     {
1893     default:
1894     case LSL:
1895       return (value << count);
1896     case LSR:
1897       return (value >> count);
1898     case ASR:
1899       {
1900         int64_t svalue = value;
1901         return (svalue >> count);
1902       }
1903     case ROR:
1904       {
1905         uint64_t top = value >> count;
1906         uint64_t bottom = value << (64 - count);
1907         return (bottom | top);
1908       }
1909     }
1910 }
1911
1912 /* Arithmetic shifted register.
1913    These allow an optional LSL, ASR or LSR to the second source
1914    register with a count up to the register bit count.
1915
1916    N.B register args may not be SP.  */
1917
1918 /* 32 bit ADD shifted register.  */
1919 static void
1920 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1921 {
1922   unsigned rm = INSTR (20, 16);
1923   unsigned rn = INSTR (9, 5);
1924   unsigned rd = INSTR (4, 0);
1925
1926   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1927   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1928                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1929                        + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1930                                     shift, count));
1931 }
1932
1933 /* 64 bit ADD shifted register.  */
1934 static void
1935 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1936 {
1937   unsigned rm = INSTR (20, 16);
1938   unsigned rn = INSTR (9, 5);
1939   unsigned rd = INSTR (4, 0);
1940
1941   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1942   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1943                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1944                        + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1945                                     shift, count));
1946 }
1947
1948 /* 32 bit ADD shifted register setting flags.  */
1949 static void
1950 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1951 {
1952   unsigned rm = INSTR (20, 16);
1953   unsigned rn = INSTR (9, 5);
1954   unsigned rd = INSTR (4, 0);
1955
1956   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1957   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1958                                shift, count);
1959
1960   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1961   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1962   set_flags_for_add32 (cpu, value1, value2);
1963 }
1964
1965 /* 64 bit ADD shifted register setting flags.  */
1966 static void
1967 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1968 {
1969   unsigned rm = INSTR (20, 16);
1970   unsigned rn = INSTR (9, 5);
1971   unsigned rd = INSTR (4, 0);
1972
1973   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1974   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1975                                shift, count);
1976
1977   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1978   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1979   set_flags_for_add64 (cpu, value1, value2);
1980 }
1981
1982 /* 32 bit SUB shifted register.  */
1983 static void
1984 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1985 {
1986   unsigned rm = INSTR (20, 16);
1987   unsigned rn = INSTR (9, 5);
1988   unsigned rd = INSTR (4, 0);
1989
1990   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1991   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1992                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1993                        - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1994                                     shift, count));
1995 }
1996
1997 /* 64 bit SUB shifted register.  */
1998 static void
1999 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2000 {
2001   unsigned rm = INSTR (20, 16);
2002   unsigned rn = INSTR (9, 5);
2003   unsigned rd = INSTR (4, 0);
2004
2005   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2006   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2007                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2008                        - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2009                                     shift, count));
2010 }
2011
2012 /* 32 bit SUB shifted register setting flags.  */
2013 static void
2014 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2015 {
2016   unsigned rm = INSTR (20, 16);
2017   unsigned rn = INSTR (9, 5);
2018   unsigned rd = INSTR (4, 0);
2019
2020   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2021   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2022                               shift, count);
2023
2024   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2025   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2026   set_flags_for_sub32 (cpu, value1, value2);
2027 }
2028
2029 /* 64 bit SUB shifted register setting flags.  */
2030 static void
2031 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2032 {
2033   unsigned rm = INSTR (20, 16);
2034   unsigned rn = INSTR (9, 5);
2035   unsigned rd = INSTR (4, 0);
2036
2037   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2038   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2039                                shift, count);
2040
2041   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2042   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2043   set_flags_for_sub64 (cpu, value1, value2);
2044 }
2045
2046 /* First a couple more helpers to fetch the
2047    relevant source register element either
2048    sign or zero extended as required by the
2049    extension value.  */
2050
2051 static uint32_t
2052 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2053 {
2054   switch (extension)
2055     {
2056     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2057     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2058     case UXTW: /* Fall through.  */
2059     case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2060     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2061     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2062     case SXTW: /* Fall through.  */
2063     case SXTX: /* Fall through.  */
2064     default:   return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2065   }
2066 }
2067
2068 static uint64_t
2069 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2070 {
2071   switch (extension)
2072     {
2073     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2074     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2075     case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2076     case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2077     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2078     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2079     case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2080     case SXTX:
2081     default:   return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2082     }
2083 }
2084
2085 /* Arithmetic extending register
2086    These allow an optional sign extension of some portion of the
2087    second source register followed by an optional left shift of
2088    between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2089
2090    N.B output (dest) and first input arg (source) may normally be Xn
2091    or SP. However, for flag setting operations dest can only be
2092    Xn. Second input registers are always Xn.  */
2093
2094 /* 32 bit ADD extending register.  */
2095 static void
2096 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2097 {
2098   unsigned rm = INSTR (20, 16);
2099   unsigned rn = INSTR (9, 5);
2100   unsigned rd = INSTR (4, 0);
2101
2102   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2103   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2104                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2105                        + (extreg32 (cpu, rm, extension) << shift));
2106 }
2107
2108 /* 64 bit ADD extending register.
2109    N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2110 static void
2111 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2112 {
2113   unsigned rm = INSTR (20, 16);
2114   unsigned rn = INSTR (9, 5);
2115   unsigned rd = INSTR (4, 0);
2116
2117   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2118   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2119                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2120                        + (extreg64 (cpu, rm, extension) << shift));
2121 }
2122
2123 /* 32 bit ADD extending register setting flags.  */
2124 static void
2125 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2126 {
2127   unsigned rm = INSTR (20, 16);
2128   unsigned rn = INSTR (9, 5);
2129   unsigned rd = INSTR (4, 0);
2130
2131   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2132   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2133
2134   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2135   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2136   set_flags_for_add32 (cpu, value1, value2);
2137 }
2138
2139 /* 64 bit ADD extending register setting flags  */
2140 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2141 static void
2142 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2143 {
2144   unsigned rm = INSTR (20, 16);
2145   unsigned rn = INSTR (9, 5);
2146   unsigned rd = INSTR (4, 0);
2147
2148   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2149   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2150
2151   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2152   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2153   set_flags_for_add64 (cpu, value1, value2);
2154 }
2155
2156 /* 32 bit SUB extending register.  */
2157 static void
2158 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2159 {
2160   unsigned rm = INSTR (20, 16);
2161   unsigned rn = INSTR (9, 5);
2162   unsigned rd = INSTR (4, 0);
2163
2164   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2165   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2166                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2167                        - (extreg32 (cpu, rm, extension) << shift));
2168 }
2169
2170 /* 64 bit SUB extending register.  */
2171 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2172 static void
2173 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2174 {
2175   unsigned rm = INSTR (20, 16);
2176   unsigned rn = INSTR (9, 5);
2177   unsigned rd = INSTR (4, 0);
2178
2179   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2180   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2181                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2182                        - (extreg64 (cpu, rm, extension) << shift));
2183 }
2184
2185 /* 32 bit SUB extending register setting flags.  */
2186 static void
2187 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2188 {
2189   unsigned rm = INSTR (20, 16);
2190   unsigned rn = INSTR (9, 5);
2191   unsigned rd = INSTR (4, 0);
2192
2193   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2194   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2195
2196   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2197   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2198   set_flags_for_sub32 (cpu, value1, value2);
2199 }
2200
2201 /* 64 bit SUB extending register setting flags  */
2202 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2203 static void
2204 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2205 {
2206   unsigned rm = INSTR (20, 16);
2207   unsigned rn = INSTR (9, 5);
2208   unsigned rd = INSTR (4, 0);
2209
2210   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2211   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2212
2213   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2214   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2215   set_flags_for_sub64 (cpu, value1, value2);
2216 }
2217
2218 static void
2219 dexAddSubtractImmediate (sim_cpu *cpu)
2220 {
2221   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2222      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2223      instr[29]    = set : 0 ==> no flags, 1 ==> set flags
2224      instr[28,24] = 10001
2225      instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2226      instr[21,10] = uimm12
2227      instr[9,5]   = Rn
2228      instr[4,0]   = Rd  */
2229
2230   /* N.B. the shift is applied at decode before calling the add/sub routine.  */
2231   uint32_t shift = INSTR (23, 22);
2232   uint32_t imm = INSTR (21, 10);
2233   uint32_t dispatch = INSTR (31, 29);
2234
2235   NYI_assert (28, 24, 0x11);
2236
2237   if (shift > 1)
2238     HALT_UNALLOC;
2239
2240   if (shift)
2241     imm <<= 12;
2242
2243   switch (dispatch)
2244     {
2245     case 0: add32 (cpu, imm); break;
2246     case 1: adds32 (cpu, imm); break;
2247     case 2: sub32 (cpu, imm); break;
2248     case 3: subs32 (cpu, imm); break;
2249     case 4: add64 (cpu, imm); break;
2250     case 5: adds64 (cpu, imm); break;
2251     case 6: sub64 (cpu, imm); break;
2252     case 7: subs64 (cpu, imm); break;
2253     }
2254 }
2255
2256 static void
2257 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2258 {
2259   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2260      instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2261      instr[28,24] = 01011
2262      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2263      instr[21]    = 0
2264      instr[20,16] = Rm
2265      instr[15,10] = count : must be 0xxxxx for 32 bit
2266      instr[9,5]   = Rn
2267      instr[4,0]   = Rd  */
2268
2269   uint32_t size = INSTR (31, 31);
2270   uint32_t count = INSTR (15, 10);
2271   Shift shiftType = INSTR (23, 22);
2272
2273   NYI_assert (28, 24, 0x0B);
2274   NYI_assert (21, 21, 0);
2275
2276   /* Shift encoded as ROR is unallocated.  */
2277   if (shiftType == ROR)
2278     HALT_UNALLOC;
2279
2280   /* 32 bit operations must have count[5] = 0
2281      or else we have an UNALLOC.  */
2282   if (size == 0 && uimm (count, 5, 5))
2283     HALT_UNALLOC;
2284
2285   /* Dispatch on size:op i.e instr [31,29].  */
2286   switch (INSTR (31, 29))
2287     {
2288     case 0: add32_shift  (cpu, shiftType, count); break;
2289     case 1: adds32_shift (cpu, shiftType, count); break;
2290     case 2: sub32_shift  (cpu, shiftType, count); break;
2291     case 3: subs32_shift (cpu, shiftType, count); break;
2292     case 4: add64_shift  (cpu, shiftType, count); break;
2293     case 5: adds64_shift (cpu, shiftType, count); break;
2294     case 6: sub64_shift  (cpu, shiftType, count); break;
2295     case 7: subs64_shift (cpu, shiftType, count); break;
2296     }
2297 }
2298
2299 static void
2300 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2301 {
2302   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2303      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2304      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2305      instr[28,24] = 01011
2306      instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2307      instr[21]    = 1
2308      instr[20,16] = Rm
2309      instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2310                              000 ==> LSL|UXTW, 001 ==> UXTZ,
2311                              000 ==> SXTB, 001 ==> SXTH,
2312                              000 ==> SXTW, 001 ==> SXTX,
2313      instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2314      instr[9,5]   = Rn
2315      instr[4,0]   = Rd  */
2316
2317   Extension extensionType = INSTR (15, 13);
2318   uint32_t shift = INSTR (12, 10);
2319
2320   NYI_assert (28, 24, 0x0B);
2321   NYI_assert (21, 21, 1);
2322
2323   /* Shift may not exceed 4.  */
2324   if (shift > 4)
2325     HALT_UNALLOC;
2326
2327   /* Dispatch on size:op:set?.  */
2328   switch (INSTR (31, 29))
2329     {
2330     case 0: add32_ext  (cpu, extensionType, shift); break;
2331     case 1: adds32_ext (cpu, extensionType, shift); break;
2332     case 2: sub32_ext  (cpu, extensionType, shift); break;
2333     case 3: subs32_ext (cpu, extensionType, shift); break;
2334     case 4: add64_ext  (cpu, extensionType, shift); break;
2335     case 5: adds64_ext (cpu, extensionType, shift); break;
2336     case 6: sub64_ext  (cpu, extensionType, shift); break;
2337     case 7: subs64_ext (cpu, extensionType, shift); break;
2338     }
2339 }
2340
2341 /* Conditional data processing
2342    Condition register is implicit 3rd source.  */
2343
2344 /* 32 bit add with carry.  */
2345 /* N.B register args may not be SP.  */
2346
2347 static void
2348 adc32 (sim_cpu *cpu)
2349 {
2350   unsigned rm = INSTR (20, 16);
2351   unsigned rn = INSTR (9, 5);
2352   unsigned rd = INSTR (4, 0);
2353
2354   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2355   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2356                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2357                        + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2358                        + IS_SET (C));
2359 }
2360
2361 /* 64 bit add with carry  */
2362 static void
2363 adc64 (sim_cpu *cpu)
2364 {
2365   unsigned rm = INSTR (20, 16);
2366   unsigned rn = INSTR (9, 5);
2367   unsigned rd = INSTR (4, 0);
2368
2369   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2370   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2371                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2372                        + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2373                        + IS_SET (C));
2374 }
2375
2376 /* 32 bit add with carry setting flags.  */
2377 static void
2378 adcs32 (sim_cpu *cpu)
2379 {
2380   unsigned rm = INSTR (20, 16);
2381   unsigned rn = INSTR (9, 5);
2382   unsigned rd = INSTR (4, 0);
2383
2384   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2385   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2386   uint32_t carry = IS_SET (C);
2387
2388   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2389   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2390   set_flags_for_add32 (cpu, value1, value2 + carry);
2391 }
2392
2393 /* 64 bit add with carry setting flags.  */
2394 static void
2395 adcs64 (sim_cpu *cpu)
2396 {
2397   unsigned rm = INSTR (20, 16);
2398   unsigned rn = INSTR (9, 5);
2399   unsigned rd = INSTR (4, 0);
2400
2401   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2402   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2403   uint64_t carry = IS_SET (C);
2404
2405   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2406   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2407   set_flags_for_add64 (cpu, value1, value2 + carry);
2408 }
2409
2410 /* 32 bit sub with carry.  */
2411 static void
2412 sbc32 (sim_cpu *cpu)
2413 {
2414   unsigned rm = INSTR (20, 16);
2415   unsigned rn = INSTR (9, 5); /* ngc iff rn == 31.  */
2416   unsigned rd = INSTR (4, 0);
2417
2418   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2419   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2420                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2421                        - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2422                        - 1 + IS_SET (C));
2423 }
2424
2425 /* 64 bit sub with carry  */
2426 static void
2427 sbc64 (sim_cpu *cpu)
2428 {
2429   unsigned rm = INSTR (20, 16);
2430   unsigned rn = INSTR (9, 5);
2431   unsigned rd = INSTR (4, 0);
2432
2433   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2434   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2435                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2436                        - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2437                        - 1 + IS_SET (C));
2438 }
2439
2440 /* 32 bit sub with carry setting flags  */
2441 static void
2442 sbcs32 (sim_cpu *cpu)
2443 {
2444   unsigned rm = INSTR (20, 16);
2445   unsigned rn = INSTR (9, 5);
2446   unsigned rd = INSTR (4, 0);
2447
2448   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2449   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2450   uint32_t carry  = IS_SET (C);
2451   uint32_t result = value1 - value2 + 1 - carry;
2452
2453   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2454   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2455   set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2456 }
2457
2458 /* 64 bit sub with carry setting flags  */
2459 static void
2460 sbcs64 (sim_cpu *cpu)
2461 {
2462   unsigned rm = INSTR (20, 16);
2463   unsigned rn = INSTR (9, 5);
2464   unsigned rd = INSTR (4, 0);
2465
2466   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2467   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2468   uint64_t carry  = IS_SET (C);
2469   uint64_t result = value1 - value2 + 1 - carry;
2470
2471   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2472   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2473   set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2474 }
2475
2476 static void
2477 dexAddSubtractWithCarry (sim_cpu *cpu)
2478 {
2479   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2480      instr[30]    = op : 0 ==> ADC, 1 ==> SBC
2481      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2482      instr[28,21] = 1 1010 000
2483      instr[20,16] = Rm
2484      instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2485      instr[9,5]   = Rn
2486      instr[4,0]   = Rd  */
2487
2488   uint32_t op2 = INSTR (15, 10);
2489
2490   NYI_assert (28, 21, 0xD0);
2491
2492   if (op2 != 0)
2493     HALT_UNALLOC;
2494
2495   /* Dispatch on size:op:set?.  */
2496   switch (INSTR (31, 29))
2497     {
2498     case 0: adc32 (cpu); break;
2499     case 1: adcs32 (cpu); break;
2500     case 2: sbc32 (cpu); break;
2501     case 3: sbcs32 (cpu); break;
2502     case 4: adc64 (cpu); break;
2503     case 5: adcs64 (cpu); break;
2504     case 6: sbc64 (cpu); break;
2505     case 7: sbcs64 (cpu); break;
2506     }
2507 }
2508
2509 static uint32_t
2510 testConditionCode (sim_cpu *cpu, CondCode cc)
2511 {
2512   /* This should be reduceable to branchless logic
2513      by some careful testing of bits in CC followed
2514      by the requisite masking and combining of bits
2515      from the flag register.
2516
2517      For now we do it with a switch.  */
2518   int res;
2519
2520   switch (cc)
2521     {
2522     case EQ:  res = IS_SET (Z);    break;
2523     case NE:  res = IS_CLEAR (Z);  break;
2524     case CS:  res = IS_SET (C);    break;
2525     case CC:  res = IS_CLEAR (C);  break;
2526     case MI:  res = IS_SET (N);    break;
2527     case PL:  res = IS_CLEAR (N);  break;
2528     case VS:  res = IS_SET (V);    break;
2529     case VC:  res = IS_CLEAR (V);  break;
2530     case HI:  res = IS_SET (C) && IS_CLEAR (Z);  break;
2531     case LS:  res = IS_CLEAR (C) || IS_SET (Z);  break;
2532     case GE:  res = IS_SET (N) == IS_SET (V);    break;
2533     case LT:  res = IS_SET (N) != IS_SET (V);    break;
2534     case GT:  res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V));  break;
2535     case LE:  res = IS_SET (Z) || (IS_SET (N) != IS_SET (V));    break;
2536     case AL:
2537     case NV:
2538     default:
2539       res = 1;
2540       break;
2541     }
2542   return res;
2543 }
2544
2545 static void
2546 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn  */
2547 {
2548   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2549      instr[30]    = compare with positive (1) or negative value (0)
2550      instr[29,21] = 1 1101 0010
2551      instr[20,16] = Rm or const
2552      instr[15,12] = cond
2553      instr[11]    = compare reg (0) or const (1)
2554      instr[10]    = 0
2555      instr[9,5]   = Rn
2556      instr[4]     = 0
2557      instr[3,0]   = value for CPSR bits if the comparison does not take place.  */
2558   signed int negate;
2559   unsigned rm;
2560   unsigned rn;
2561
2562   NYI_assert (29, 21, 0x1d2);
2563   NYI_assert (10, 10, 0);
2564   NYI_assert (4, 4, 0);
2565
2566   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2567   if (! testConditionCode (cpu, INSTR (15, 12)))
2568     {
2569       aarch64_set_CPSR (cpu, INSTR (3, 0));
2570       return;
2571     }
2572
2573   negate = INSTR (30, 30) ? 1 : -1;
2574   rm = INSTR (20, 16);
2575   rn = INSTR ( 9,  5);
2576
2577   if (INSTR (31, 31))
2578     {
2579       if (INSTR (11, 11))
2580         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2581                              negate * (uint64_t) rm);
2582       else
2583         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2584                              negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2585     }
2586   else
2587     {
2588       if (INSTR (11, 11))
2589         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2590                              negate * rm);
2591       else
2592         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2593                              negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2594     }
2595 }
2596
2597 static void
2598 do_vec_MOV_whole_vector (sim_cpu *cpu)
2599 {
2600   /* MOV Vd.T, Vs.T  (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2601
2602      instr[31]    = 0
2603      instr[30]    = half(0)/full(1)
2604      instr[29,21] = 001110101
2605      instr[20,16] = Vs
2606      instr[15,10] = 000111
2607      instr[9,5]   = Vs
2608      instr[4,0]   = Vd  */
2609
2610   unsigned vs = INSTR (9, 5);
2611   unsigned vd = INSTR (4, 0);
2612
2613   NYI_assert (29, 21, 0x075);
2614   NYI_assert (15, 10, 0x07);
2615
2616   if (INSTR (20, 16) != vs)
2617     HALT_NYI;
2618
2619   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2620   if (INSTR (30, 30))
2621     aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2622
2623   aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2624 }
2625
2626 static void
2627 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2628 {
2629   /* instr[31]    = 0
2630      instr[30]    = word(0)/long(1)
2631      instr[29,21] = 00 1110 000
2632      instr[20,16] = element size and index
2633      instr[15,10] = 00 0010 11
2634      instr[9,5]   = V source
2635      instr[4,0]   = R dest  */
2636
2637   unsigned vs = INSTR (9, 5);
2638   unsigned rd = INSTR (4, 0);
2639   unsigned imm5 = INSTR (20, 16);
2640   unsigned full = INSTR (30, 30);
2641   int size, index;
2642
2643   NYI_assert (29, 21, 0x070);
2644   NYI_assert (15, 10, 0x0B);
2645
2646   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2647
2648   if (imm5 & 0x1)
2649     {
2650       size = 0;
2651       index = (imm5 >> 1) & 0xF;
2652     }
2653   else if (imm5 & 0x2)
2654     {
2655       size = 1;
2656       index = (imm5 >> 2) & 0x7;
2657     }
2658   else if (full && (imm5 & 0x4))
2659     {
2660       size = 2;
2661       index = (imm5 >> 3) & 0x3;
2662     }
2663   else
2664     HALT_UNALLOC;
2665
2666   switch (size)
2667     {
2668     case 0:
2669       if (full)
2670         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2671                              aarch64_get_vec_s8 (cpu, vs, index));
2672       else
2673         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2674                              aarch64_get_vec_s8 (cpu, vs, index));
2675       break;
2676
2677     case 1:
2678       if (full)
2679         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2680                              aarch64_get_vec_s16 (cpu, vs, index));
2681       else
2682         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2683                              aarch64_get_vec_s16 (cpu, vs, index));
2684       break;
2685
2686     case 2:
2687       aarch64_set_reg_s64 (cpu, rd, NO_SP,
2688                            aarch64_get_vec_s32 (cpu, vs, index));
2689       break;
2690
2691     default:
2692       HALT_UNALLOC;
2693     }
2694 }
2695
2696 static void
2697 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2698 {
2699   /* instr[31]    = 0
2700      instr[30]    = word(0)/long(1)
2701      instr[29,21] = 00 1110 000
2702      instr[20,16] = element size and index
2703      instr[15,10] = 00 0011 11
2704      instr[9,5]   = V source
2705      instr[4,0]   = R dest  */
2706
2707   unsigned vs = INSTR (9, 5);
2708   unsigned rd = INSTR (4, 0);
2709   unsigned imm5 = INSTR (20, 16);
2710   unsigned full = INSTR (30, 30);
2711   int size, index;
2712
2713   NYI_assert (29, 21, 0x070);
2714   NYI_assert (15, 10, 0x0F);
2715
2716   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2717
2718   if (!full)
2719     {
2720       if (imm5 & 0x1)
2721         {
2722           size = 0;
2723           index = (imm5 >> 1) & 0xF;
2724         }
2725       else if (imm5 & 0x2)
2726         {
2727           size = 1;
2728           index = (imm5 >> 2) & 0x7;
2729         }
2730       else if (imm5 & 0x4)
2731         {
2732           size = 2;
2733           index = (imm5 >> 3) & 0x3;
2734         }
2735       else
2736         HALT_UNALLOC;
2737     }
2738   else if (imm5 & 0x8)
2739     {
2740       size = 3;
2741       index = (imm5 >> 4) & 0x1;
2742     }
2743   else
2744     HALT_UNALLOC;
2745
2746   switch (size)
2747     {
2748     case 0:
2749       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2750                            aarch64_get_vec_u8 (cpu, vs, index));
2751       break;
2752
2753     case 1:
2754       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2755                            aarch64_get_vec_u16 (cpu, vs, index));
2756       break;
2757
2758     case 2:
2759       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2760                            aarch64_get_vec_u32 (cpu, vs, index));
2761       break;
2762
2763     case 3:
2764       aarch64_set_reg_u64 (cpu, rd, NO_SP,
2765                            aarch64_get_vec_u64 (cpu, vs, index));
2766       break;
2767
2768     default:
2769       HALT_UNALLOC;
2770     }
2771 }
2772
2773 static void
2774 do_vec_INS (sim_cpu *cpu)
2775 {
2776   /* instr[31,21] = 01001110000
2777      instr[20,16] = element size and index
2778      instr[15,10] = 000111
2779      instr[9,5]   = W source
2780      instr[4,0]   = V dest  */
2781
2782   int index;
2783   unsigned rs = INSTR (9, 5);
2784   unsigned vd = INSTR (4, 0);
2785
2786   NYI_assert (31, 21, 0x270);
2787   NYI_assert (15, 10, 0x07);
2788
2789   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2790   if (INSTR (16, 16))
2791     {
2792       index = INSTR (20, 17);
2793       aarch64_set_vec_u8 (cpu, vd, index,
2794                           aarch64_get_reg_u8 (cpu, rs, NO_SP));
2795     }
2796   else if (INSTR (17, 17))
2797     {
2798       index = INSTR (20, 18);
2799       aarch64_set_vec_u16 (cpu, vd, index,
2800                            aarch64_get_reg_u16 (cpu, rs, NO_SP));
2801     }
2802   else if (INSTR (18, 18))
2803     {
2804       index = INSTR (20, 19);
2805       aarch64_set_vec_u32 (cpu, vd, index,
2806                            aarch64_get_reg_u32 (cpu, rs, NO_SP));
2807     }
2808   else if (INSTR (19, 19))
2809     {
2810       index = INSTR (20, 20);
2811       aarch64_set_vec_u64 (cpu, vd, index,
2812                            aarch64_get_reg_u64 (cpu, rs, NO_SP));
2813     }
2814   else
2815     HALT_NYI;
2816 }
2817
2818 static void
2819 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2820 {
2821   /* instr[31]    = 0
2822      instr[30]    = half(0)/full(1)
2823      instr[29,21] = 00 1110 000
2824      instr[20,16] = element size and index
2825      instr[15,10] = 0000 01
2826      instr[9,5]   = V source
2827      instr[4,0]   = V dest.  */
2828
2829   unsigned full = INSTR (30, 30);
2830   unsigned vs = INSTR (9, 5);
2831   unsigned vd = INSTR (4, 0);
2832   int i, index;
2833
2834   NYI_assert (29, 21, 0x070);
2835   NYI_assert (15, 10, 0x01);
2836
2837   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2838   if (INSTR (16, 16))
2839     {
2840       index = INSTR (20, 17);
2841
2842       for (i = 0; i < (full ? 16 : 8); i++)
2843         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2844     }
2845   else if (INSTR (17, 17))
2846     {
2847       index = INSTR (20, 18);
2848
2849       for (i = 0; i < (full ? 8 : 4); i++)
2850         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2851     }
2852   else if (INSTR (18, 18))
2853     {
2854       index = INSTR (20, 19);
2855
2856       for (i = 0; i < (full ? 4 : 2); i++)
2857         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2858     }
2859   else
2860     {
2861       if (INSTR (19, 19) == 0)
2862         HALT_UNALLOC;
2863
2864       if (! full)
2865         HALT_UNALLOC;
2866
2867       index = INSTR (20, 20);
2868
2869       for (i = 0; i < 2; i++)
2870         aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2871     }
2872 }
2873
2874 static void
2875 do_vec_TBL (sim_cpu *cpu)
2876 {
2877   /* instr[31]    = 0
2878      instr[30]    = half(0)/full(1)
2879      instr[29,21] = 00 1110 000
2880      instr[20,16] = Vm
2881      instr[15]    = 0
2882      instr[14,13] = vec length
2883      instr[12,10] = 000
2884      instr[9,5]   = V start
2885      instr[4,0]   = V dest  */
2886
2887   int full    = INSTR (30, 30);
2888   int len     = INSTR (14, 13) + 1;
2889   unsigned vm = INSTR (20, 16);
2890   unsigned vn = INSTR (9, 5);
2891   unsigned vd = INSTR (4, 0);
2892   unsigned i;
2893
2894   NYI_assert (29, 21, 0x070);
2895   NYI_assert (12, 10, 0);
2896
2897   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2898   for (i = 0; i < (full ? 16 : 8); i++)
2899     {
2900       unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2901       uint8_t val;
2902
2903       if (selector < 16)
2904         val = aarch64_get_vec_u8 (cpu, vn, selector);
2905       else if (selector < 32)
2906         val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2907       else if (selector < 48)
2908         val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2909       else if (selector < 64)
2910         val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2911       else
2912         val = 0;
2913
2914       aarch64_set_vec_u8 (cpu, vd, i, val);
2915     }
2916 }
2917
2918 static void
2919 do_vec_TRN (sim_cpu *cpu)
2920 {
2921   /* instr[31]    = 0
2922      instr[30]    = half(0)/full(1)
2923      instr[29,24] = 00 1110
2924      instr[23,22] = size
2925      instr[21]    = 0
2926      instr[20,16] = Vm
2927      instr[15]    = 0
2928      instr[14]    = TRN1 (0) / TRN2 (1)
2929      instr[13,10] = 1010
2930      instr[9,5]   = V source
2931      instr[4,0]   = V dest.  */
2932
2933   int full    = INSTR (30, 30);
2934   int second  = INSTR (14, 14);
2935   unsigned vm = INSTR (20, 16);
2936   unsigned vn = INSTR (9, 5);
2937   unsigned vd = INSTR (4, 0);
2938   unsigned i;
2939
2940   NYI_assert (29, 24, 0x0E);
2941   NYI_assert (13, 10, 0xA);
2942
2943   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2944   switch (INSTR (23, 22))
2945     {
2946     case 0:
2947       for (i = 0; i < (full ? 8 : 4); i++)
2948         {
2949           aarch64_set_vec_u8
2950             (cpu, vd, i * 2,
2951              aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2952           aarch64_set_vec_u8
2953             (cpu, vd, 1 * 2 + 1,
2954              aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2955         }
2956       break;
2957
2958     case 1:
2959       for (i = 0; i < (full ? 4 : 2); i++)
2960         {
2961           aarch64_set_vec_u16
2962             (cpu, vd, i * 2,
2963              aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2964           aarch64_set_vec_u16
2965             (cpu, vd, 1 * 2 + 1,
2966              aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2967         }
2968       break;
2969
2970     case 2:
2971       aarch64_set_vec_u32
2972         (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2973       aarch64_set_vec_u32
2974         (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2975       aarch64_set_vec_u32
2976         (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2977       aarch64_set_vec_u32
2978         (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2979       break;
2980
2981     case 3:
2982       if (! full)
2983         HALT_UNALLOC;
2984
2985       aarch64_set_vec_u64 (cpu, vd, 0,
2986                            aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2987       aarch64_set_vec_u64 (cpu, vd, 1,
2988                            aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2989       break;
2990     }
2991 }
2992
2993 static void
2994 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2995 {
2996   /* instr[31]    = 0
2997      instr[30]    = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2998                     [must be 1 for 64-bit xfer]
2999      instr[29,20] = 00 1110 0000
3000      instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
3001                                   0100=> 32-bits. 1000=>64-bits
3002      instr[15,10] = 0000 11
3003      instr[9,5]   = W source
3004      instr[4,0]   = V dest.  */
3005
3006   unsigned i;
3007   unsigned Vd = INSTR (4, 0);
3008   unsigned Rs = INSTR (9, 5);
3009   int both    = INSTR (30, 30);
3010
3011   NYI_assert (29, 20, 0x0E0);
3012   NYI_assert (15, 10, 0x03);
3013
3014   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3015   switch (INSTR (19, 16))
3016     {
3017     case 1:
3018       for (i = 0; i < (both ? 16 : 8); i++)
3019         aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3020       break;
3021
3022     case 2:
3023       for (i = 0; i < (both ? 8 : 4); i++)
3024         aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3025       break;
3026
3027     case 4:
3028       for (i = 0; i < (both ? 4 : 2); i++)
3029         aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3030       break;
3031
3032     case 8:
3033       if (!both)
3034         HALT_NYI;
3035       aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3036       aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3037       break;
3038
3039     default:
3040       HALT_NYI;
3041     }
3042 }
3043
3044 static void
3045 do_vec_UZP (sim_cpu *cpu)
3046 {
3047   /* instr[31]    = 0
3048      instr[30]    = half(0)/full(1)
3049      instr[29,24] = 00 1110
3050      instr[23,22] = size: byte(00), half(01), word (10), long (11)
3051      instr[21]    = 0
3052      instr[20,16] = Vm
3053      instr[15]    = 0
3054      instr[14]    = lower (0) / upper (1)
3055      instr[13,10] = 0110
3056      instr[9,5]   = Vn
3057      instr[4,0]   = Vd.  */
3058
3059   int full = INSTR (30, 30);
3060   int upper = INSTR (14, 14);
3061
3062   unsigned vm = INSTR (20, 16);
3063   unsigned vn = INSTR (9, 5);
3064   unsigned vd = INSTR (4, 0);
3065
3066   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3067   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3068   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3069   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3070
3071   uint64_t val1;
3072   uint64_t val2;
3073
3074   uint64_t input2 = full ? val_n2 : val_m1;
3075
3076   NYI_assert (29, 24, 0x0E);
3077   NYI_assert (21, 21, 0);
3078   NYI_assert (15, 15, 0);
3079   NYI_assert (13, 10, 6);
3080
3081   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3082   switch (INSTR (23, 22))
3083     {
3084     case 0:
3085       val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3086       val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3087       val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3088       val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3089
3090       val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3091       val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3092       val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3093       val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3094
3095       if (full)
3096         {
3097           val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3098           val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3099           val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3100           val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3101
3102           val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3103           val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3104           val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3105           val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3106         }
3107       break;
3108
3109     case 1:
3110       val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3111       val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3112
3113       val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3114       val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3115
3116       if (full)
3117         {
3118           val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3119           val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3120
3121           val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3122           val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3123         }
3124       break;
3125
3126     case 2:
3127       val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3128       val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3129
3130       if (full)
3131         {
3132           val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3133           val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3134         }
3135       break;
3136
3137     case 3:
3138       if (! full)
3139         HALT_UNALLOC;
3140
3141       val1 = upper ? val_n2 : val_n1;
3142       val2 = upper ? val_m2 : val_m1;
3143       break;
3144     }
3145
3146   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3147   if (full)
3148     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3149 }
3150
3151 static void
3152 do_vec_ZIP (sim_cpu *cpu)
3153 {
3154   /* instr[31]    = 0
3155      instr[30]    = half(0)/full(1)
3156      instr[29,24] = 00 1110
3157      instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3158      instr[21]    = 0
3159      instr[20,16] = Vm
3160      instr[15]    = 0
3161      instr[14]    = lower (0) / upper (1)
3162      instr[13,10] = 1110
3163      instr[9,5]   = Vn
3164      instr[4,0]   = Vd.  */
3165
3166   int full = INSTR (30, 30);
3167   int upper = INSTR (14, 14);
3168
3169   unsigned vm = INSTR (20, 16);
3170   unsigned vn = INSTR (9, 5);
3171   unsigned vd = INSTR (4, 0);
3172
3173   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3174   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3175   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3176   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3177
3178   uint64_t val1 = 0;
3179   uint64_t val2 = 0;
3180
3181   uint64_t input1 = upper ? val_n1 : val_m1;
3182   uint64_t input2 = upper ? val_n2 : val_m2;
3183
3184   NYI_assert (29, 24, 0x0E);
3185   NYI_assert (21, 21, 0);
3186   NYI_assert (15, 15, 0);
3187   NYI_assert (13, 10, 0xE);
3188
3189   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3190   switch (INSTR (23, 23))
3191     {
3192     case 0:
3193       val1 =
3194           ((input1 <<  0) & (0xFF    <<  0))
3195         | ((input2 <<  8) & (0xFF    <<  8))
3196         | ((input1 <<  8) & (0xFF    << 16))
3197         | ((input2 << 16) & (0xFF    << 24))
3198         | ((input1 << 16) & (0xFFULL << 32))
3199         | ((input2 << 24) & (0xFFULL << 40))
3200         | ((input1 << 24) & (0xFFULL << 48))
3201         | ((input2 << 32) & (0xFFULL << 56));
3202
3203       val2 =
3204           ((input1 >> 32) & (0xFF    <<  0))
3205         | ((input2 >> 24) & (0xFF    <<  8))
3206         | ((input1 >> 24) & (0xFF    << 16))
3207         | ((input2 >> 16) & (0xFF    << 24))
3208         | ((input1 >> 16) & (0xFFULL << 32))
3209         | ((input2 >>  8) & (0xFFULL << 40))
3210         | ((input1 >>  8) & (0xFFULL << 48))
3211         | ((input2 >>  0) & (0xFFULL << 56));
3212       break;
3213
3214     case 1:
3215       val1 =
3216           ((input1 <<  0) & (0xFFFF    <<  0))
3217         | ((input2 << 16) & (0xFFFF    << 16))
3218         | ((input1 << 16) & (0xFFFFULL << 32))
3219         | ((input2 << 32) & (0xFFFFULL << 48));
3220
3221       val2 =
3222           ((input1 >> 32) & (0xFFFF    <<  0))
3223         | ((input2 >> 16) & (0xFFFF    << 16))
3224         | ((input1 >> 16) & (0xFFFFULL << 32))
3225         | ((input2 >>  0) & (0xFFFFULL << 48));
3226       break;
3227
3228     case 2:
3229       val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3230       val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3231       break;
3232
3233     case 3:
3234       val1 = input1;
3235       val2 = input2;
3236       break;
3237     }
3238
3239   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3240   if (full)
3241     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3242 }
3243
3244 /* Floating point immediates are encoded in 8 bits.
3245    fpimm[7] = sign bit.
3246    fpimm[6:4] = signed exponent.
3247    fpimm[3:0] = fraction (assuming leading 1).
3248    i.e. F = s * 1.f * 2^(e - b).  */
3249
3250 static float
3251 fp_immediate_for_encoding_32 (uint32_t imm8)
3252 {
3253   float u;
3254   uint32_t s, e, f, i;
3255
3256   s = (imm8 >> 7) & 0x1;
3257   e = (imm8 >> 4) & 0x7;
3258   f = imm8 & 0xf;
3259
3260   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3261   u = (16.0 + f) / 16.0;
3262
3263   /* N.B. exponent is signed.  */
3264   if (e < 4)
3265     {
3266       int epos = e;
3267
3268       for (i = 0; i <= epos; i++)
3269         u *= 2.0;
3270     }
3271   else
3272     {
3273       int eneg = 7 - e;
3274
3275       for (i = 0; i < eneg; i++)
3276         u /= 2.0;
3277     }
3278
3279   if (s)
3280     u = - u;
3281
3282   return u;
3283 }
3284
3285 static double
3286 fp_immediate_for_encoding_64 (uint32_t imm8)
3287 {
3288   double u;
3289   uint32_t s, e, f, i;
3290
3291   s = (imm8 >> 7) & 0x1;
3292   e = (imm8 >> 4) & 0x7;
3293   f = imm8 & 0xf;
3294
3295   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3296   u = (16.0 + f) / 16.0;
3297
3298   /* N.B. exponent is signed.  */
3299   if (e < 4)
3300     {
3301       int epos = e;
3302
3303       for (i = 0; i <= epos; i++)
3304         u *= 2.0;
3305     }
3306   else
3307     {
3308       int eneg = 7 - e;
3309
3310       for (i = 0; i < eneg; i++)
3311         u /= 2.0;
3312     }
3313
3314   if (s)
3315     u = - u;
3316
3317   return u;
3318 }
3319
3320 static void
3321 do_vec_MOV_immediate (sim_cpu *cpu)
3322 {
3323   /* instr[31]    = 0
3324      instr[30]    = full/half selector
3325      instr[29,19] = 00111100000
3326      instr[18,16] = high 3 bits of uimm8
3327      instr[15,12] = size & shift:
3328                                   0000 => 32-bit
3329                                   0010 => 32-bit + LSL#8
3330                                   0100 => 32-bit + LSL#16
3331                                   0110 => 32-bit + LSL#24
3332                                   1010 => 16-bit + LSL#8
3333                                   1000 => 16-bit
3334                                   1101 => 32-bit + MSL#16
3335                                   1100 => 32-bit + MSL#8
3336                                   1110 => 8-bit
3337                                   1111 => double
3338      instr[11,10] = 01
3339      instr[9,5]   = low 5-bits of uimm8
3340      instr[4,0]   = Vd.  */
3341
3342   int full     = INSTR (30, 30);
3343   unsigned vd  = INSTR (4, 0);
3344   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3345   unsigned i;
3346
3347   NYI_assert (29, 19, 0x1E0);
3348   NYI_assert (11, 10, 1);
3349
3350   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3351   switch (INSTR (15, 12))
3352     {
3353     case 0x0: /* 32-bit, no shift.  */
3354     case 0x2: /* 32-bit, shift by 8.  */
3355     case 0x4: /* 32-bit, shift by 16.  */
3356     case 0x6: /* 32-bit, shift by 24.  */
3357       val <<= (8 * INSTR (14, 13));
3358       for (i = 0; i < (full ? 4 : 2); i++)
3359         aarch64_set_vec_u32 (cpu, vd, i, val);
3360       break;
3361
3362     case 0xa: /* 16-bit, shift by 8.  */
3363       val <<= 8;
3364       /* Fall through.  */
3365     case 0x8: /* 16-bit, no shift.  */
3366       for (i = 0; i < (full ? 8 : 4); i++)
3367         aarch64_set_vec_u16 (cpu, vd, i, val);
3368       break;
3369
3370     case 0xd: /* 32-bit, mask shift by 16.  */
3371       val <<= 8;
3372       val |= 0xFF;
3373       /* Fall through.  */
3374     case 0xc: /* 32-bit, mask shift by 8. */
3375       val <<= 8;
3376       val |= 0xFF;
3377       for (i = 0; i < (full ? 4 : 2); i++)
3378         aarch64_set_vec_u32 (cpu, vd, i, val);
3379       break;
3380
3381     case 0xe: /* 8-bit, no shift.  */
3382       for (i = 0; i < (full ? 16 : 8); i++)
3383         aarch64_set_vec_u8 (cpu, vd, i, val);
3384       break;
3385
3386     case 0xf: /* FMOV Vs.{2|4}S, #fpimm.  */
3387       {
3388         float u = fp_immediate_for_encoding_32 (val);
3389         for (i = 0; i < (full ? 4 : 2); i++)
3390           aarch64_set_vec_float (cpu, vd, i, u);
3391         break;
3392       }
3393
3394     default:
3395       HALT_NYI;
3396     }
3397 }
3398
3399 static void
3400 do_vec_MVNI (sim_cpu *cpu)
3401 {
3402   /* instr[31]    = 0
3403      instr[30]    = full/half selector
3404      instr[29,19] = 10111100000
3405      instr[18,16] = high 3 bits of uimm8
3406      instr[15,12] = selector
3407      instr[11,10] = 01
3408      instr[9,5]   = low 5-bits of uimm8
3409      instr[4,0]   = Vd.  */
3410
3411   int full     = INSTR (30, 30);
3412   unsigned vd  = INSTR (4, 0);
3413   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3414   unsigned i;
3415
3416   NYI_assert (29, 19, 0x5E0);
3417   NYI_assert (11, 10, 1);
3418
3419   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3420   switch (INSTR (15, 12))
3421     {
3422     case 0x0: /* 32-bit, no shift.  */
3423     case 0x2: /* 32-bit, shift by 8.  */
3424     case 0x4: /* 32-bit, shift by 16.  */
3425     case 0x6: /* 32-bit, shift by 24.  */
3426       val <<= (8 * INSTR (14, 13));
3427       val = ~ val;
3428       for (i = 0; i < (full ? 4 : 2); i++)
3429         aarch64_set_vec_u32 (cpu, vd, i, val);
3430       return;
3431
3432     case 0xa: /* 16-bit, 8 bit shift. */
3433       val <<= 8;
3434     case 0x8: /* 16-bit, no shift. */
3435       val = ~ val;
3436       for (i = 0; i < (full ? 8 : 4); i++)
3437         aarch64_set_vec_u16 (cpu, vd, i, val);
3438       return;
3439
3440     case 0xd: /* 32-bit, mask shift by 16.  */
3441       val <<= 8;
3442       val |= 0xFF;
3443     case 0xc: /* 32-bit, mask shift by 8. */
3444       val <<= 8;
3445       val |= 0xFF;
3446       val = ~ val;
3447       for (i = 0; i < (full ? 4 : 2); i++)
3448         aarch64_set_vec_u32 (cpu, vd, i, val);
3449       return;
3450
3451     case 0xE: /* MOVI Dn, #mask64 */
3452       {
3453         uint64_t mask = 0;
3454
3455         for (i = 0; i < 8; i++)
3456           if (val & (1 << i))
3457             mask |= (0xFFUL << (i * 8));
3458         aarch64_set_vec_u64 (cpu, vd, 0, mask);
3459         aarch64_set_vec_u64 (cpu, vd, 1, mask);
3460         return;
3461       }
3462
3463     case 0xf: /* FMOV Vd.2D, #fpimm.  */
3464       {
3465         double u = fp_immediate_for_encoding_64 (val);
3466
3467         if (! full)
3468           HALT_UNALLOC;
3469
3470         aarch64_set_vec_double (cpu, vd, 0, u);
3471         aarch64_set_vec_double (cpu, vd, 1, u);
3472         return;
3473       }
3474
3475     default:
3476       HALT_NYI;
3477     }
3478 }
3479
3480 #define ABS(A) ((A) < 0 ? - (A) : (A))
3481
3482 static void
3483 do_vec_ABS (sim_cpu *cpu)
3484 {
3485   /* instr[31]    = 0
3486      instr[30]    = half(0)/full(1)
3487      instr[29,24] = 00 1110
3488      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3489      instr[21,10] = 10 0000 1011 10
3490      instr[9,5]   = Vn
3491      instr[4.0]   = Vd.  */
3492
3493   unsigned vn = INSTR (9, 5);
3494   unsigned vd = INSTR (4, 0);
3495   unsigned full = INSTR (30, 30);
3496   unsigned i;
3497
3498   NYI_assert (29, 24, 0x0E);
3499   NYI_assert (21, 10, 0x82E);
3500
3501   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3502   switch (INSTR (23, 22))
3503     {
3504     case 0:
3505       for (i = 0; i < (full ? 16 : 8); i++)
3506         aarch64_set_vec_s8 (cpu, vd, i,
3507                             ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3508       break;
3509
3510     case 1:
3511       for (i = 0; i < (full ? 8 : 4); i++)
3512         aarch64_set_vec_s16 (cpu, vd, i,
3513                              ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3514       break;
3515
3516     case 2:
3517       for (i = 0; i < (full ? 4 : 2); i++)
3518         aarch64_set_vec_s32 (cpu, vd, i,
3519                              ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3520       break;
3521
3522     case 3:
3523       if (! full)
3524         HALT_NYI;
3525       for (i = 0; i < 2; i++)
3526         aarch64_set_vec_s64 (cpu, vd, i,
3527                              ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3528       break;
3529     }
3530 }
3531
3532 static void
3533 do_vec_ADDV (sim_cpu *cpu)
3534 {
3535   /* instr[31]    = 0
3536      instr[30]    = full/half selector
3537      instr[29,24] = 00 1110
3538      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3539      instr[21,10] = 11 0001 1011 10
3540      instr[9,5]   = Vm
3541      instr[4.0]   = Rd.  */
3542
3543   unsigned vm = INSTR (9, 5);
3544   unsigned rd = INSTR (4, 0);
3545   unsigned i;
3546   int      full = INSTR (30, 30);
3547
3548   NYI_assert (29, 24, 0x0E);
3549   NYI_assert (21, 10, 0xC6E);
3550
3551   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3552   switch (INSTR (23, 22))
3553     {
3554     case 0:
3555       {
3556         uint8_t val = 0;
3557         for (i = 0; i < (full ? 16 : 8); i++)
3558           val += aarch64_get_vec_u8 (cpu, vm, i);
3559         aarch64_set_vec_u64 (cpu, rd, 0, val);
3560         return;
3561       }
3562
3563     case 1:
3564       {
3565         uint16_t val = 0;
3566         for (i = 0; i < (full ? 8 : 4); i++)
3567           val += aarch64_get_vec_u16 (cpu, vm, i);
3568         aarch64_set_vec_u64 (cpu, rd, 0, val);
3569         return;
3570       }
3571
3572     case 2:
3573       {
3574         uint32_t val = 0;
3575         if (! full)
3576           HALT_UNALLOC;
3577         for (i = 0; i < 4; i++)
3578           val += aarch64_get_vec_u32 (cpu, vm, i);
3579         aarch64_set_vec_u64 (cpu, rd, 0, val);
3580         return;
3581       }
3582
3583     case 3:
3584       HALT_UNALLOC;
3585     }
3586 }
3587
3588 static void
3589 do_vec_ins_2 (sim_cpu *cpu)
3590 {
3591   /* instr[31,21] = 01001110000
3592      instr[20,18] = size & element selector
3593      instr[17,14] = 0000
3594      instr[13]    = direction: to vec(0), from vec (1)
3595      instr[12,10] = 111
3596      instr[9,5]   = Vm
3597      instr[4,0]   = Vd.  */
3598
3599   unsigned elem;
3600   unsigned vm = INSTR (9, 5);
3601   unsigned vd = INSTR (4, 0);
3602
3603   NYI_assert (31, 21, 0x270);
3604   NYI_assert (17, 14, 0);
3605   NYI_assert (12, 10, 7);
3606
3607   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3608   if (INSTR (13, 13) == 1)
3609     {
3610       if (INSTR (18, 18) == 1)
3611         {
3612           /* 32-bit moves.  */
3613           elem = INSTR (20, 19);
3614           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3615                                aarch64_get_vec_u32 (cpu, vm, elem));
3616         }
3617       else
3618         {
3619           /* 64-bit moves.  */
3620           if (INSTR (19, 19) != 1)
3621             HALT_NYI;
3622
3623           elem = INSTR (20, 20);
3624           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3625                                aarch64_get_vec_u64 (cpu, vm, elem));
3626         }
3627     }
3628   else
3629     {
3630       if (INSTR (18, 18) == 1)
3631         {
3632           /* 32-bit moves.  */
3633           elem = INSTR (20, 19);
3634           aarch64_set_vec_u32 (cpu, vd, elem,
3635                                aarch64_get_reg_u32 (cpu, vm, NO_SP));
3636         }
3637       else
3638         {
3639           /* 64-bit moves.  */
3640           if (INSTR (19, 19) != 1)
3641             HALT_NYI;
3642
3643           elem = INSTR (20, 20);
3644           aarch64_set_vec_u64 (cpu, vd, elem,
3645                                aarch64_get_reg_u64 (cpu, vm, NO_SP));
3646         }
3647     }
3648 }
3649
3650 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE)   \
3651   do                                                              \
3652     {                                                             \
3653       DST_TYPE a[N], b[N];                                        \
3654                                                                   \
3655       for (i = 0; i < (N); i++)                                   \
3656         {                                                         \
3657           a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3658           b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3659         }                                                         \
3660       for (i = 0; i < (N); i++)                                   \
3661         aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]);   \
3662     }                                                             \
3663   while (0)
3664
3665 static void
3666 do_vec_mull (sim_cpu *cpu)
3667 {
3668   /* instr[31]    = 0
3669      instr[30]    = lower(0)/upper(1) selector
3670      instr[29]    = signed(0)/unsigned(1)
3671      instr[28,24] = 0 1110
3672      instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3673      instr[21]    = 1
3674      instr[20,16] = Vm
3675      instr[15,10] = 11 0000
3676      instr[9,5]   = Vn
3677      instr[4.0]   = Vd.  */
3678
3679   int    unsign = INSTR (29, 29);
3680   int    bias = INSTR (30, 30);
3681   unsigned vm = INSTR (20, 16);
3682   unsigned vn = INSTR ( 9,  5);
3683   unsigned vd = INSTR ( 4,  0);
3684   unsigned i;
3685
3686   NYI_assert (28, 24, 0x0E);
3687   NYI_assert (15, 10, 0x30);
3688
3689   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3690   /* NB: Read source values before writing results, in case
3691      the source and destination vectors are the same.  */
3692   switch (INSTR (23, 22))
3693     {
3694     case 0:
3695       if (bias)
3696         bias = 8;
3697       if (unsign)
3698         DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3699       else
3700         DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3701       return;
3702
3703     case 1:
3704       if (bias)
3705         bias = 4;
3706       if (unsign)
3707         DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3708       else
3709         DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3710       return;
3711
3712     case 2:
3713       if (bias)
3714         bias = 2;
3715       if (unsign)
3716         DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3717       else
3718         DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3719       return;
3720
3721     case 3:
3722       HALT_NYI;
3723     }
3724 }
3725
3726 static void
3727 do_vec_fadd (sim_cpu *cpu)
3728 {
3729   /* instr[31]    = 0
3730      instr[30]    = half(0)/full(1)
3731      instr[29,24] = 001110
3732      instr[23]    = FADD(0)/FSUB(1)
3733      instr[22]    = float (0)/double(1)
3734      instr[21]    = 1
3735      instr[20,16] = Vm
3736      instr[15,10] = 110101
3737      instr[9,5]   = Vn
3738      instr[4.0]   = Vd.  */
3739
3740   unsigned vm = INSTR (20, 16);
3741   unsigned vn = INSTR (9, 5);
3742   unsigned vd = INSTR (4, 0);
3743   unsigned i;
3744   int      full = INSTR (30, 30);
3745
3746   NYI_assert (29, 24, 0x0E);
3747   NYI_assert (21, 21, 1);
3748   NYI_assert (15, 10, 0x35);
3749
3750   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3751   if (INSTR (23, 23))
3752     {
3753       if (INSTR (22, 22))
3754         {
3755           if (! full)
3756             HALT_NYI;
3757
3758           for (i = 0; i < 2; i++)
3759             aarch64_set_vec_double (cpu, vd, i,
3760                                     aarch64_get_vec_double (cpu, vn, i)
3761                                     - aarch64_get_vec_double (cpu, vm, i));
3762         }
3763       else
3764         {
3765           for (i = 0; i < (full ? 4 : 2); i++)
3766             aarch64_set_vec_float (cpu, vd, i,
3767                                    aarch64_get_vec_float (cpu, vn, i)
3768                                    - aarch64_get_vec_float (cpu, vm, i));
3769         }
3770     }
3771   else
3772     {
3773       if (INSTR (22, 22))
3774         {
3775           if (! full)
3776             HALT_NYI;
3777
3778           for (i = 0; i < 2; i++)
3779             aarch64_set_vec_double (cpu, vd, i,
3780                                     aarch64_get_vec_double (cpu, vm, i)
3781                                     + aarch64_get_vec_double (cpu, vn, i));
3782         }
3783       else
3784         {
3785           for (i = 0; i < (full ? 4 : 2); i++)
3786             aarch64_set_vec_float (cpu, vd, i,
3787                                    aarch64_get_vec_float (cpu, vm, i)
3788                                    + aarch64_get_vec_float (cpu, vn, i));
3789         }
3790     }
3791 }
3792
3793 static void
3794 do_vec_add (sim_cpu *cpu)
3795 {
3796   /* instr[31]    = 0
3797      instr[30]    = full/half selector
3798      instr[29,24] = 001110
3799      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3800      instr[21]    = 1
3801      instr[20,16] = Vn
3802      instr[15,10] = 100001
3803      instr[9,5]   = Vm
3804      instr[4.0]   = Vd.  */
3805
3806   unsigned vm = INSTR (20, 16);
3807   unsigned vn = INSTR (9, 5);
3808   unsigned vd = INSTR (4, 0);
3809   unsigned i;
3810   int      full = INSTR (30, 30);
3811
3812   NYI_assert (29, 24, 0x0E);
3813   NYI_assert (21, 21, 1);
3814   NYI_assert (15, 10, 0x21);
3815
3816   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3817   switch (INSTR (23, 22))
3818     {
3819     case 0:
3820       for (i = 0; i < (full ? 16 : 8); i++)
3821         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3822                             + aarch64_get_vec_u8 (cpu, vm, i));
3823       return;
3824
3825     case 1:
3826       for (i = 0; i < (full ? 8 : 4); i++)
3827         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3828                              + aarch64_get_vec_u16 (cpu, vm, i));
3829       return;
3830
3831     case 2:
3832       for (i = 0; i < (full ? 4 : 2); i++)
3833         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3834                              + aarch64_get_vec_u32 (cpu, vm, i));
3835       return;
3836
3837     case 3:
3838       if (! full)
3839         HALT_UNALLOC;
3840       aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3841                            + aarch64_get_vec_u64 (cpu, vm, 0));
3842       aarch64_set_vec_u64 (cpu, vd, 1,
3843                            aarch64_get_vec_u64 (cpu, vn, 1)
3844                            + aarch64_get_vec_u64 (cpu, vm, 1));
3845       return;
3846     }
3847 }
3848
3849 static void
3850 do_vec_mul (sim_cpu *cpu)
3851 {
3852   /* instr[31]    = 0
3853      instr[30]    = full/half selector
3854      instr[29,24] = 00 1110
3855      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3856      instr[21]    = 1
3857      instr[20,16] = Vn
3858      instr[15,10] = 10 0111
3859      instr[9,5]   = Vm
3860      instr[4.0]   = Vd.  */
3861
3862   unsigned vm = INSTR (20, 16);
3863   unsigned vn = INSTR (9, 5);
3864   unsigned vd = INSTR (4, 0);
3865   unsigned i;
3866   int      full = INSTR (30, 30);
3867   int      bias = 0;
3868
3869   NYI_assert (29, 24, 0x0E);
3870   NYI_assert (21, 21, 1);
3871   NYI_assert (15, 10, 0x27);
3872
3873   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3874   switch (INSTR (23, 22))
3875     {
3876     case 0:
3877       DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3878       return;
3879
3880     case 1:
3881       DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3882       return;
3883
3884     case 2:
3885       DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3886       return;
3887
3888     case 3:
3889       HALT_UNALLOC;
3890     }
3891 }
3892
3893 static void
3894 do_vec_MLA (sim_cpu *cpu)
3895 {
3896   /* instr[31]    = 0
3897      instr[30]    = full/half selector
3898      instr[29,24] = 00 1110
3899      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3900      instr[21]    = 1
3901      instr[20,16] = Vn
3902      instr[15,10] = 1001 01
3903      instr[9,5]   = Vm
3904      instr[4.0]   = Vd.  */
3905
3906   unsigned vm = INSTR (20, 16);
3907   unsigned vn = INSTR (9, 5);
3908   unsigned vd = INSTR (4, 0);
3909   unsigned i;
3910   int      full = INSTR (30, 30);
3911
3912   NYI_assert (29, 24, 0x0E);
3913   NYI_assert (21, 21, 1);
3914   NYI_assert (15, 10, 0x25);
3915
3916   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3917   switch (INSTR (23, 22))
3918     {
3919     case 0:
3920       for (i = 0; i < (full ? 16 : 8); i++)
3921         aarch64_set_vec_u8 (cpu, vd, i,
3922                             aarch64_get_vec_u8 (cpu, vd, i)
3923                             + (aarch64_get_vec_u8 (cpu, vn, i)
3924                                * aarch64_get_vec_u8 (cpu, vm, i)));
3925       return;
3926
3927     case 1:
3928       for (i = 0; i < (full ? 8 : 4); i++)
3929         aarch64_set_vec_u16 (cpu, vd, i,
3930                              aarch64_get_vec_u16 (cpu, vd, i)
3931                              + (aarch64_get_vec_u16 (cpu, vn, i)
3932                                 * aarch64_get_vec_u16 (cpu, vm, i)));
3933       return;
3934
3935     case 2:
3936       for (i = 0; i < (full ? 4 : 2); i++)
3937         aarch64_set_vec_u32 (cpu, vd, i,
3938                              aarch64_get_vec_u32 (cpu, vd, i)
3939                              + (aarch64_get_vec_u32 (cpu, vn, i)
3940                                 * aarch64_get_vec_u32 (cpu, vm, i)));
3941       return;
3942
3943     default:
3944       HALT_UNALLOC;
3945     }
3946 }
3947
3948 static float
3949 fmaxnm (float a, float b)
3950 {
3951   if (! isnan (a))
3952     {
3953       if (! isnan (b))
3954         return a > b ? a : b;
3955       return a;
3956     }
3957   else if (! isnan (b))
3958     return b;
3959   return a;
3960 }
3961
3962 static float
3963 fminnm (float a, float b)
3964 {
3965   if (! isnan (a))
3966     {
3967       if (! isnan (b))
3968         return a < b ? a : b;
3969       return a;
3970     }
3971   else if (! isnan (b))
3972     return b;
3973   return a;
3974 }
3975
3976 static double
3977 dmaxnm (double a, double b)
3978 {
3979   if (! isnan (a))
3980     {
3981       if (! isnan (b))
3982         return a > b ? a : b;
3983       return a;
3984     }
3985   else if (! isnan (b))
3986     return b;
3987   return a;
3988 }
3989
3990 static double
3991 dminnm (double a, double b)
3992 {
3993   if (! isnan (a))
3994     {
3995       if (! isnan (b))
3996         return a < b ? a : b;
3997       return a;
3998     }
3999   else if (! isnan (b))
4000     return b;
4001   return a;
4002 }
4003
4004 static void
4005 do_vec_FminmaxNMP (sim_cpu *cpu)
4006 {
4007   /* instr [31]    = 0
4008      instr [30]    = half (0)/full (1)
4009      instr [29,24] = 10 1110
4010      instr [23]    = max(0)/min(1)
4011      instr [22]    = float (0)/double (1)
4012      instr [21]    = 1
4013      instr [20,16] = Vn
4014      instr [15,10] = 1100 01
4015      instr [9,5]   = Vm
4016      instr [4.0]   = Vd.  */
4017
4018   unsigned vm = INSTR (20, 16);
4019   unsigned vn = INSTR (9, 5);
4020   unsigned vd = INSTR (4, 0);
4021   int      full = INSTR (30, 30);
4022
4023   NYI_assert (29, 24, 0x2E);
4024   NYI_assert (21, 21, 1);
4025   NYI_assert (15, 10, 0x31);
4026
4027   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4028   if (INSTR (22, 22))
4029     {
4030       double (* fn)(double, double) = INSTR (23, 23)
4031         ? dminnm : dmaxnm;
4032
4033       if (! full)
4034         HALT_NYI;
4035       aarch64_set_vec_double (cpu, vd, 0,
4036                               fn (aarch64_get_vec_double (cpu, vn, 0),
4037                                   aarch64_get_vec_double (cpu, vn, 1)));
4038       aarch64_set_vec_double (cpu, vd, 0,
4039                               fn (aarch64_get_vec_double (cpu, vm, 0),
4040                                   aarch64_get_vec_double (cpu, vm, 1)));
4041     }
4042   else
4043     {
4044       float (* fn)(float, float) = INSTR (23, 23)
4045         ? fminnm : fmaxnm;
4046
4047       aarch64_set_vec_float (cpu, vd, 0,
4048                              fn (aarch64_get_vec_float (cpu, vn, 0),
4049                                  aarch64_get_vec_float (cpu, vn, 1)));
4050       if (full)
4051         aarch64_set_vec_float (cpu, vd, 1,
4052                                fn (aarch64_get_vec_float (cpu, vn, 2),
4053                                    aarch64_get_vec_float (cpu, vn, 3)));
4054
4055       aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4056                              fn (aarch64_get_vec_float (cpu, vm, 0),
4057                                  aarch64_get_vec_float (cpu, vm, 1)));
4058       if (full)
4059         aarch64_set_vec_float (cpu, vd, 3,
4060                                fn (aarch64_get_vec_float (cpu, vm, 2),
4061                                    aarch64_get_vec_float (cpu, vm, 3)));
4062     }
4063 }
4064
4065 static void
4066 do_vec_AND (sim_cpu *cpu)
4067 {
4068   /* instr[31]    = 0
4069      instr[30]    = half (0)/full (1)
4070      instr[29,21] = 001110001
4071      instr[20,16] = Vm
4072      instr[15,10] = 000111
4073      instr[9,5]   = Vn
4074      instr[4.0]   = Vd.  */
4075
4076   unsigned vm = INSTR (20, 16);
4077   unsigned vn = INSTR (9, 5);
4078   unsigned vd = INSTR (4, 0);
4079   unsigned i;
4080   int      full = INSTR (30, 30);
4081
4082   NYI_assert (29, 21, 0x071);
4083   NYI_assert (15, 10, 0x07);
4084
4085   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4086   for (i = 0; i < (full ? 4 : 2); i++)
4087     aarch64_set_vec_u32 (cpu, vd, i,
4088                          aarch64_get_vec_u32 (cpu, vn, i)
4089                          & aarch64_get_vec_u32 (cpu, vm, i));
4090 }
4091
4092 static void
4093 do_vec_BSL (sim_cpu *cpu)
4094 {
4095   /* instr[31]    = 0
4096      instr[30]    = half (0)/full (1)
4097      instr[29,21] = 101110011
4098      instr[20,16] = Vm
4099      instr[15,10] = 000111
4100      instr[9,5]   = Vn
4101      instr[4.0]   = Vd.  */
4102
4103   unsigned vm = INSTR (20, 16);
4104   unsigned vn = INSTR (9, 5);
4105   unsigned vd = INSTR (4, 0);
4106   unsigned i;
4107   int      full = INSTR (30, 30);
4108
4109   NYI_assert (29, 21, 0x173);
4110   NYI_assert (15, 10, 0x07);
4111
4112   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4113   for (i = 0; i < (full ? 16 : 8); i++)
4114     aarch64_set_vec_u8 (cpu, vd, i,
4115                         (    aarch64_get_vec_u8 (cpu, vd, i)
4116                            & aarch64_get_vec_u8 (cpu, vn, i))
4117                         | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4118                            & aarch64_get_vec_u8 (cpu, vm, i)));
4119 }
4120
4121 static void
4122 do_vec_EOR (sim_cpu *cpu)
4123 {
4124   /* instr[31]    = 0
4125      instr[30]    = half (0)/full (1)
4126      instr[29,21] = 10 1110 001
4127      instr[20,16] = Vm
4128      instr[15,10] = 000111
4129      instr[9,5]   = Vn
4130      instr[4.0]   = Vd.  */
4131
4132   unsigned vm = INSTR (20, 16);
4133   unsigned vn = INSTR (9, 5);
4134   unsigned vd = INSTR (4, 0);
4135   unsigned i;
4136   int      full = INSTR (30, 30);
4137
4138   NYI_assert (29, 21, 0x171);
4139   NYI_assert (15, 10, 0x07);
4140
4141   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4142   for (i = 0; i < (full ? 4 : 2); i++)
4143     aarch64_set_vec_u32 (cpu, vd, i,
4144                          aarch64_get_vec_u32 (cpu, vn, i)
4145                          ^ aarch64_get_vec_u32 (cpu, vm, i));
4146 }
4147
4148 static void
4149 do_vec_bit (sim_cpu *cpu)
4150 {
4151   /* instr[31]    = 0
4152      instr[30]    = half (0)/full (1)
4153      instr[29,23] = 10 1110 1
4154      instr[22]    = BIT (0) / BIF (1)
4155      instr[21]    = 1
4156      instr[20,16] = Vm
4157      instr[15,10] = 0001 11
4158      instr[9,5]   = Vn
4159      instr[4.0]   = Vd.  */
4160
4161   unsigned vm = INSTR (20, 16);
4162   unsigned vn = INSTR (9, 5);
4163   unsigned vd = INSTR (4, 0);
4164   unsigned full = INSTR (30, 30);
4165   unsigned test_false = INSTR (22, 22);
4166   unsigned i;
4167
4168   NYI_assert (29, 23, 0x5D);
4169   NYI_assert (21, 21, 1);
4170   NYI_assert (15, 10, 0x07);
4171
4172   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4173   for (i = 0; i < (full ? 4 : 2); i++)
4174     {
4175       uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4176       uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4177       uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4178       if (test_false)
4179         aarch64_set_vec_u32 (cpu, vd, i,
4180                              (vd_val & vm_val) | (vn_val & ~vm_val));
4181       else
4182         aarch64_set_vec_u32 (cpu, vd, i,
4183                              (vd_val & ~vm_val) | (vn_val & vm_val));
4184     }
4185 }
4186
4187 static void
4188 do_vec_ORN (sim_cpu *cpu)
4189 {
4190   /* instr[31]    = 0
4191      instr[30]    = half (0)/full (1)
4192      instr[29,21] = 00 1110 111
4193      instr[20,16] = Vm
4194      instr[15,10] = 00 0111
4195      instr[9,5]   = Vn
4196      instr[4.0]   = Vd.  */
4197
4198   unsigned vm = INSTR (20, 16);
4199   unsigned vn = INSTR (9, 5);
4200   unsigned vd = INSTR (4, 0);
4201   unsigned i;
4202   int      full = INSTR (30, 30);
4203
4204   NYI_assert (29, 21, 0x077);
4205   NYI_assert (15, 10, 0x07);
4206
4207   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4208   for (i = 0; i < (full ? 16 : 8); i++)
4209     aarch64_set_vec_u8 (cpu, vd, i,
4210                         aarch64_get_vec_u8 (cpu, vn, i)
4211                         | ~ aarch64_get_vec_u8 (cpu, vm, i));
4212 }
4213
4214 static void
4215 do_vec_ORR (sim_cpu *cpu)
4216 {
4217   /* instr[31]    = 0
4218      instr[30]    = half (0)/full (1)
4219      instr[29,21] = 00 1110 101
4220      instr[20,16] = Vm
4221      instr[15,10] = 0001 11
4222      instr[9,5]   = Vn
4223      instr[4.0]   = Vd.  */
4224
4225   unsigned vm = INSTR (20, 16);
4226   unsigned vn = INSTR (9, 5);
4227   unsigned vd = INSTR (4, 0);
4228   unsigned i;
4229   int      full = INSTR (30, 30);
4230
4231   NYI_assert (29, 21, 0x075);
4232   NYI_assert (15, 10, 0x07);
4233
4234   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4235   for (i = 0; i < (full ? 16 : 8); i++)
4236     aarch64_set_vec_u8 (cpu, vd, i,
4237                         aarch64_get_vec_u8 (cpu, vn, i)
4238                         | aarch64_get_vec_u8 (cpu, vm, i));
4239 }
4240
4241 static void
4242 do_vec_BIC (sim_cpu *cpu)
4243 {
4244   /* instr[31]    = 0
4245      instr[30]    = half (0)/full (1)
4246      instr[29,21] = 00 1110 011
4247      instr[20,16] = Vm
4248      instr[15,10] = 00 0111
4249      instr[9,5]   = Vn
4250      instr[4.0]   = Vd.  */
4251
4252   unsigned vm = INSTR (20, 16);
4253   unsigned vn = INSTR (9, 5);
4254   unsigned vd = INSTR (4, 0);
4255   unsigned i;
4256   int      full = INSTR (30, 30);
4257
4258   NYI_assert (29, 21, 0x073);
4259   NYI_assert (15, 10, 0x07);
4260
4261   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4262   for (i = 0; i < (full ? 16 : 8); i++)
4263     aarch64_set_vec_u8 (cpu, vd, i,
4264                         aarch64_get_vec_u8 (cpu, vn, i)
4265                         & ~ aarch64_get_vec_u8 (cpu, vm, i));
4266 }
4267
4268 static void
4269 do_vec_XTN (sim_cpu *cpu)
4270 {
4271   /* instr[31]    = 0
4272      instr[30]    = first part (0)/ second part (1)
4273      instr[29,24] = 00 1110
4274      instr[23,22] = size: byte(00), half(01), word (10)
4275      instr[21,10] = 1000 0100 1010
4276      instr[9,5]   = Vs
4277      instr[4,0]   = Vd.  */
4278
4279   unsigned vs = INSTR (9, 5);
4280   unsigned vd = INSTR (4, 0);
4281   unsigned bias = INSTR (30, 30);
4282   unsigned i;
4283
4284   NYI_assert (29, 24, 0x0E);
4285   NYI_assert (21, 10, 0x84A);
4286
4287   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4288   switch (INSTR (23, 22))
4289     {
4290     case 0:
4291       for (i = 0; i < 8; i++)
4292         aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4293                             aarch64_get_vec_u16 (cpu, vs, i));
4294       return;
4295
4296     case 1:
4297       for (i = 0; i < 4; i++)
4298         aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4299                              aarch64_get_vec_u32 (cpu, vs, i));
4300       return;
4301
4302     case 2:
4303       for (i = 0; i < 2; i++)
4304         aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4305                              aarch64_get_vec_u64 (cpu, vs, i));
4306       return;
4307     }
4308 }
4309
4310 /* Return the number of bits set in the input value.  */
4311 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4312 # define popcount __builtin_popcount
4313 #else
4314 static int
4315 popcount (unsigned char x)
4316 {
4317   static const unsigned char popcnt[16] =
4318     {
4319       0, 1, 1, 2,
4320       1, 2, 2, 3,
4321       1, 2, 2, 3,
4322       2, 3, 3, 4
4323     };
4324
4325   /* Only counts the low 8 bits of the input as that is all we need.  */
4326   return popcnt[x % 16] + popcnt[x / 16];
4327 }
4328 #endif
4329
4330 static void
4331 do_vec_CNT (sim_cpu *cpu)
4332 {
4333   /* instr[31]    = 0
4334      instr[30]    = half (0)/ full (1)
4335      instr[29,24] = 00 1110
4336      instr[23,22] = size: byte(00)
4337      instr[21,10] = 1000 0001 0110
4338      instr[9,5]   = Vs
4339      instr[4,0]   = Vd.  */
4340
4341   unsigned vs = INSTR (9, 5);
4342   unsigned vd = INSTR (4, 0);
4343   int full = INSTR (30, 30);
4344   int size = INSTR (23, 22);
4345   int i;
4346
4347   NYI_assert (29, 24, 0x0E);
4348   NYI_assert (21, 10, 0x816);
4349
4350   if (size != 0)
4351     HALT_UNALLOC;
4352
4353   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4354
4355   for (i = 0; i < (full ? 16 : 8); i++)
4356     aarch64_set_vec_u8 (cpu, vd, i,
4357                         popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4358 }
4359
4360 static void
4361 do_vec_maxv (sim_cpu *cpu)
4362 {
4363   /* instr[31]    = 0
4364      instr[30]    = half(0)/full(1)
4365      instr[29]    = signed (0)/unsigned(1)
4366      instr[28,24] = 0 1110
4367      instr[23,22] = size: byte(00), half(01), word (10)
4368      instr[21]    = 1
4369      instr[20,17] = 1 000
4370      instr[16]    = max(0)/min(1)
4371      instr[15,10] = 1010 10
4372      instr[9,5]   = V source
4373      instr[4.0]   = R dest.  */
4374
4375   unsigned vs = INSTR (9, 5);
4376   unsigned rd = INSTR (4, 0);
4377   unsigned full = INSTR (30, 30);
4378   unsigned i;
4379
4380   NYI_assert (28, 24, 0x0E);
4381   NYI_assert (21, 21, 1);
4382   NYI_assert (20, 17, 8);
4383   NYI_assert (15, 10, 0x2A);
4384
4385   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4386   switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4387     {
4388     case 0: /* SMAXV.  */
4389        {
4390         int64_t smax;
4391         switch (INSTR (23, 22))
4392           {
4393           case 0:
4394             smax = aarch64_get_vec_s8 (cpu, vs, 0);
4395             for (i = 1; i < (full ? 16 : 8); i++)
4396               smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4397             break;
4398           case 1:
4399             smax = aarch64_get_vec_s16 (cpu, vs, 0);
4400             for (i = 1; i < (full ? 8 : 4); i++)
4401               smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4402             break;
4403           case 2:
4404             smax = aarch64_get_vec_s32 (cpu, vs, 0);
4405             for (i = 1; i < (full ? 4 : 2); i++)
4406               smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4407             break;
4408           case 3:
4409             HALT_UNALLOC;
4410           }
4411         aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4412         return;
4413       }
4414
4415     case 1: /* SMINV.  */
4416       {
4417         int64_t smin;
4418         switch (INSTR (23, 22))
4419           {
4420           case 0:
4421             smin = aarch64_get_vec_s8 (cpu, vs, 0);
4422             for (i = 1; i < (full ? 16 : 8); i++)
4423               smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4424             break;
4425           case 1:
4426             smin = aarch64_get_vec_s16 (cpu, vs, 0);
4427             for (i = 1; i < (full ? 8 : 4); i++)
4428               smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4429             break;
4430           case 2:
4431             smin = aarch64_get_vec_s32 (cpu, vs, 0);
4432             for (i = 1; i < (full ? 4 : 2); i++)
4433               smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4434             break;
4435
4436           case 3:
4437             HALT_UNALLOC;
4438           }
4439         aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4440         return;
4441       }
4442
4443     case 2: /* UMAXV.  */
4444       {
4445         uint64_t umax;
4446         switch (INSTR (23, 22))
4447           {
4448           case 0:
4449             umax = aarch64_get_vec_u8 (cpu, vs, 0);
4450             for (i = 1; i < (full ? 16 : 8); i++)
4451               umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4452             break;
4453           case 1:
4454             umax = aarch64_get_vec_u16 (cpu, vs, 0);
4455             for (i = 1; i < (full ? 8 : 4); i++)
4456               umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4457             break;
4458           case 2:
4459             umax = aarch64_get_vec_u32 (cpu, vs, 0);
4460             for (i = 1; i < (full ? 4 : 2); i++)
4461               umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4462             break;
4463
4464           case 3:
4465             HALT_UNALLOC;
4466           }
4467         aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4468         return;
4469       }
4470
4471     case 3: /* UMINV.  */
4472       {
4473         uint64_t umin;
4474         switch (INSTR (23, 22))
4475           {
4476           case 0:
4477             umin = aarch64_get_vec_u8 (cpu, vs, 0);
4478             for (i = 1; i < (full ? 16 : 8); i++)
4479               umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4480             break;
4481           case 1:
4482             umin = aarch64_get_vec_u16 (cpu, vs, 0);
4483             for (i = 1; i < (full ? 8 : 4); i++)
4484               umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4485             break;
4486           case 2:
4487             umin = aarch64_get_vec_u32 (cpu, vs, 0);
4488             for (i = 1; i < (full ? 4 : 2); i++)
4489               umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4490             break;
4491
4492           case 3:
4493             HALT_UNALLOC;
4494           }
4495         aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4496         return;
4497       }
4498     }
4499 }
4500
4501 static void
4502 do_vec_fminmaxV (sim_cpu *cpu)
4503 {
4504   /* instr[31,24] = 0110 1110
4505      instr[23]    = max(0)/min(1)
4506      instr[22,14] = 011 0000 11
4507      instr[13,12] = nm(00)/normal(11)
4508      instr[11,10] = 10
4509      instr[9,5]   = V source
4510      instr[4.0]   = R dest.  */
4511
4512   unsigned vs = INSTR (9, 5);
4513   unsigned rd = INSTR (4, 0);
4514   unsigned i;
4515   float res   = aarch64_get_vec_float (cpu, vs, 0);
4516
4517   NYI_assert (31, 24, 0x6E);
4518   NYI_assert (22, 14, 0x0C3);
4519   NYI_assert (11, 10, 2);
4520
4521   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4522   if (INSTR (23, 23))
4523     {
4524       switch (INSTR (13, 12))
4525         {
4526         case 0: /* FMNINNMV.  */
4527           for (i = 1; i < 4; i++)
4528             res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4529           break;
4530
4531         case 3: /* FMINV.  */
4532           for (i = 1; i < 4; i++)
4533             res = min (res, aarch64_get_vec_float (cpu, vs, i));
4534           break;
4535
4536         default:
4537           HALT_NYI;
4538         }
4539     }
4540   else
4541     {
4542       switch (INSTR (13, 12))
4543         {
4544         case 0: /* FMNAXNMV.  */
4545           for (i = 1; i < 4; i++)
4546             res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4547           break;
4548
4549         case 3: /* FMAXV.  */
4550           for (i = 1; i < 4; i++)
4551             res = max (res, aarch64_get_vec_float (cpu, vs, i));
4552           break;
4553
4554         default:
4555           HALT_NYI;
4556         }
4557     }
4558
4559   aarch64_set_FP_float (cpu, rd, res);
4560 }
4561
4562 static void
4563 do_vec_Fminmax (sim_cpu *cpu)
4564 {
4565   /* instr[31]    = 0
4566      instr[30]    = half(0)/full(1)
4567      instr[29,24] = 00 1110
4568      instr[23]    = max(0)/min(1)
4569      instr[22]    = float(0)/double(1)
4570      instr[21]    = 1
4571      instr[20,16] = Vm
4572      instr[15,14] = 11
4573      instr[13,12] = nm(00)/normal(11)
4574      instr[11,10] = 01
4575      instr[9,5]   = Vn
4576      instr[4,0]   = Vd.  */
4577
4578   unsigned vm = INSTR (20, 16);
4579   unsigned vn = INSTR (9, 5);
4580   unsigned vd = INSTR (4, 0);
4581   unsigned full = INSTR (30, 30);
4582   unsigned min = INSTR (23, 23);
4583   unsigned i;
4584
4585   NYI_assert (29, 24, 0x0E);
4586   NYI_assert (21, 21, 1);
4587   NYI_assert (15, 14, 3);
4588   NYI_assert (11, 10, 1);
4589
4590   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4591   if (INSTR (22, 22))
4592     {
4593       double (* func)(double, double);
4594
4595       if (! full)
4596         HALT_NYI;
4597
4598       if (INSTR (13, 12) == 0)
4599         func = min ? dminnm : dmaxnm;
4600       else if (INSTR (13, 12) == 3)
4601         func = min ? fmin : fmax;
4602       else
4603         HALT_NYI;
4604
4605       for (i = 0; i < 2; i++)
4606         aarch64_set_vec_double (cpu, vd, i,
4607                                 func (aarch64_get_vec_double (cpu, vn, i),
4608                                       aarch64_get_vec_double (cpu, vm, i)));
4609     }
4610   else
4611     {
4612       float (* func)(float, float);
4613
4614       if (INSTR (13, 12) == 0)
4615         func = min ? fminnm : fmaxnm;
4616       else if (INSTR (13, 12) == 3)
4617         func = min ? fminf : fmaxf;
4618       else
4619         HALT_NYI;
4620
4621       for (i = 0; i < (full ? 4 : 2); i++)
4622         aarch64_set_vec_float (cpu, vd, i,
4623                                func (aarch64_get_vec_float (cpu, vn, i),
4624                                      aarch64_get_vec_float (cpu, vm, i)));
4625     }
4626 }
4627
4628 static void
4629 do_vec_SCVTF (sim_cpu *cpu)
4630 {
4631   /* instr[31]    = 0
4632      instr[30]    = Q
4633      instr[29,23] = 00 1110 0
4634      instr[22]    = float(0)/double(1)
4635      instr[21,10] = 10 0001 1101 10
4636      instr[9,5]   = Vn
4637      instr[4,0]   = Vd.  */
4638
4639   unsigned vn = INSTR (9, 5);
4640   unsigned vd = INSTR (4, 0);
4641   unsigned full = INSTR (30, 30);
4642   unsigned size = INSTR (22, 22);
4643   unsigned i;
4644
4645   NYI_assert (29, 23, 0x1C);
4646   NYI_assert (21, 10, 0x876);
4647
4648   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4649   if (size)
4650     {
4651       if (! full)
4652         HALT_UNALLOC;
4653
4654       for (i = 0; i < 2; i++)
4655         {
4656           double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4657           aarch64_set_vec_double (cpu, vd, i, val);
4658         }
4659     }
4660   else
4661     {
4662       for (i = 0; i < (full ? 4 : 2); i++)
4663         {
4664           float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4665           aarch64_set_vec_float (cpu, vd, i, val);
4666         }
4667     }
4668 }
4669
4670 #define VEC_CMP(SOURCE, CMP)                                            \
4671   do                                                                    \
4672     {                                                                   \
4673       switch (size)                                                     \
4674         {                                                               \
4675         case 0:                                                         \
4676           for (i = 0; i < (full ? 16 : 8); i++)                         \
4677             aarch64_set_vec_u8 (cpu, vd, i,                             \
4678                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4679                                 CMP                                     \
4680                                 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4681                                 ? -1 : 0);                              \
4682           return;                                                       \
4683         case 1:                                                         \
4684           for (i = 0; i < (full ? 8 : 4); i++)                          \
4685             aarch64_set_vec_u16 (cpu, vd, i,                            \
4686                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4687                                  CMP                                    \
4688                                  aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4689                                  ? -1 : 0);                             \
4690           return;                                                       \
4691         case 2:                                                         \
4692           for (i = 0; i < (full ? 4 : 2); i++)                          \
4693             aarch64_set_vec_u32 (cpu, vd, i, \
4694                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4695                                  CMP                                    \
4696                                  aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4697                                  ? -1 : 0);                             \
4698           return;                                                       \
4699         case 3:                                                         \
4700           if (! full)                                                   \
4701             HALT_UNALLOC;                                               \
4702           for (i = 0; i < 2; i++)                                       \
4703             aarch64_set_vec_u64 (cpu, vd, i, \
4704                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4705                                  CMP                                    \
4706                                  aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4707                                  ? -1ULL : 0);                          \
4708           return;                                                       \
4709         }                                                               \
4710     }                                                                   \
4711   while (0)
4712
4713 #define VEC_CMP0(SOURCE, CMP)                                           \
4714   do                                                                    \
4715     {                                                                   \
4716       switch (size)                                                     \
4717         {                                                               \
4718         case 0:                                                         \
4719           for (i = 0; i < (full ? 16 : 8); i++)                         \
4720             aarch64_set_vec_u8 (cpu, vd, i,                             \
4721                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4722                                 CMP 0 ? -1 : 0);                        \
4723           return;                                                       \
4724         case 1:                                                         \
4725           for (i = 0; i < (full ? 8 : 4); i++)                          \
4726             aarch64_set_vec_u16 (cpu, vd, i,                            \
4727                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4728                                  CMP 0 ? -1 : 0);                       \
4729           return;                                                       \
4730         case 2:                                                         \
4731           for (i = 0; i < (full ? 4 : 2); i++)                          \
4732             aarch64_set_vec_u32 (cpu, vd, i,                            \
4733                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4734                                  CMP 0 ? -1 : 0);                       \
4735           return;                                                       \
4736         case 3:                                                         \
4737           if (! full)                                                   \
4738             HALT_UNALLOC;                                               \
4739           for (i = 0; i < 2; i++)                                       \
4740             aarch64_set_vec_u64 (cpu, vd, i,                            \
4741                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4742                                  CMP 0 ? -1ULL : 0);                    \
4743           return;                                                       \
4744         }                                                               \
4745     }                                                                   \
4746   while (0)
4747
4748 #define VEC_FCMP0(CMP)                                                  \
4749   do                                                                    \
4750     {                                                                   \
4751       if (vm != 0)                                                      \
4752         HALT_NYI;                                                       \
4753       if (INSTR (22, 22))                                               \
4754         {                                                               \
4755           if (! full)                                                   \
4756             HALT_NYI;                                                   \
4757           for (i = 0; i < 2; i++)                                       \
4758             aarch64_set_vec_u64 (cpu, vd, i,                            \
4759                                  aarch64_get_vec_double (cpu, vn, i)    \
4760                                  CMP 0.0 ? -1 : 0);                     \
4761         }                                                               \
4762       else                                                              \
4763         {                                                               \
4764           for (i = 0; i < (full ? 4 : 2); i++)                          \
4765             aarch64_set_vec_u32 (cpu, vd, i,                            \
4766                                  aarch64_get_vec_float (cpu, vn, i)     \
4767                                  CMP 0.0 ? -1 : 0);                     \
4768         }                                                               \
4769       return;                                                           \
4770     }                                                                   \
4771   while (0)
4772
4773 #define VEC_FCMP(CMP)                                                   \
4774   do                                                                    \
4775     {                                                                   \
4776       if (INSTR (22, 22))                                               \
4777         {                                                               \
4778           if (! full)                                                   \
4779             HALT_NYI;                                                   \
4780           for (i = 0; i < 2; i++)                                       \
4781             aarch64_set_vec_u64 (cpu, vd, i,                            \
4782                                  aarch64_get_vec_double (cpu, vn, i)    \
4783                                  CMP                                    \
4784                                  aarch64_get_vec_double (cpu, vm, i)    \
4785                                  ? -1 : 0);                             \
4786         }                                                               \
4787       else                                                              \
4788         {                                                               \
4789           for (i = 0; i < (full ? 4 : 2); i++)                          \
4790             aarch64_set_vec_u32 (cpu, vd, i,                            \
4791                                  aarch64_get_vec_float (cpu, vn, i)     \
4792                                  CMP                                    \
4793                                  aarch64_get_vec_float (cpu, vm, i)     \
4794                                  ? -1 : 0);                             \
4795         }                                                               \
4796       return;                                                           \
4797     }                                                                   \
4798   while (0)
4799
4800 static void
4801 do_vec_compare (sim_cpu *cpu)
4802 {
4803   /* instr[31]    = 0
4804      instr[30]    = half(0)/full(1)
4805      instr[29]    = part-of-comparison-type
4806      instr[28,24] = 0 1110
4807      instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4808                     type of float compares: single (-0) / double (-1)
4809      instr[21]    = 1
4810      instr[20,16] = Vm or 00000 (compare vs 0)
4811      instr[15,10] = part-of-comparison-type
4812      instr[9,5]   = Vn
4813      instr[4.0]   = Vd.  */
4814
4815   int full = INSTR (30, 30);
4816   int size = INSTR (23, 22);
4817   unsigned vm = INSTR (20, 16);
4818   unsigned vn = INSTR (9, 5);
4819   unsigned vd = INSTR (4, 0);
4820   unsigned i;
4821
4822   NYI_assert (28, 24, 0x0E);
4823   NYI_assert (21, 21, 1);
4824
4825   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4826   if ((INSTR (11, 11)
4827        && INSTR (14, 14))
4828       || ((INSTR (11, 11) == 0
4829            && INSTR (10, 10) == 0)))
4830     {
4831       /* A compare vs 0.  */
4832       if (vm != 0)
4833         {
4834           if (INSTR (15, 10) == 0x2A)
4835             do_vec_maxv (cpu);
4836           else if (INSTR (15, 10) == 0x32
4837                    || INSTR (15, 10) == 0x3E)
4838             do_vec_fminmaxV (cpu);
4839           else if (INSTR (29, 23) == 0x1C
4840                    && INSTR (21, 10) == 0x876)
4841             do_vec_SCVTF (cpu);
4842           else
4843             HALT_NYI;
4844           return;
4845         }
4846     }
4847
4848   if (INSTR (14, 14))
4849     {
4850       /* A floating point compare.  */
4851       unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4852         | INSTR (13, 10);
4853
4854       NYI_assert (15, 15, 1);
4855
4856       switch (decode)
4857         {
4858         case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4859         case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4860         case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4861         case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4862         case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4863         case /* 0b111001: GT */   0x39: VEC_FCMP  (>);
4864         case /* 0b101001: GE */   0x29: VEC_FCMP  (>=);
4865         case /* 0b001001: EQ */   0x09: VEC_FCMP  (==);
4866
4867         default:
4868           HALT_NYI;
4869         }
4870     }
4871   else
4872     {
4873       unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4874
4875       switch (decode)
4876         {
4877         case 0x0D: /* 0001101 GT */     VEC_CMP  (s, > );
4878         case 0x0F: /* 0001111 GE */     VEC_CMP  (s, >= );
4879         case 0x22: /* 0100010 GT #0 */  VEC_CMP0 (s, > );
4880         case 0x23: /* 0100011 TST */    VEC_CMP  (u, & );
4881         case 0x26: /* 0100110 EQ #0 */  VEC_CMP0 (s, == );
4882         case 0x2A: /* 0101010 LT #0 */  VEC_CMP0 (s, < );
4883         case 0x4D: /* 1001101 HI */     VEC_CMP  (u, > );
4884         case 0x4F: /* 1001111 HS */     VEC_CMP  (u, >= );
4885         case 0x62: /* 1100010 GE #0 */  VEC_CMP0 (s, >= );
4886         case 0x63: /* 1100011 EQ */     VEC_CMP  (u, == );
4887         case 0x66: /* 1100110 LE #0 */  VEC_CMP0 (s, <= );
4888         default:
4889           if (vm == 0)
4890             HALT_NYI;
4891           do_vec_maxv (cpu);
4892         }
4893     }
4894 }
4895
4896 static void
4897 do_vec_SSHL (sim_cpu *cpu)
4898 {
4899   /* instr[31]    = 0
4900      instr[30]    = first part (0)/ second part (1)
4901      instr[29,24] = 00 1110
4902      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4903      instr[21]    = 1
4904      instr[20,16] = Vm
4905      instr[15,10] = 0100 01
4906      instr[9,5]   = Vn
4907      instr[4,0]   = Vd.  */
4908
4909   unsigned full = INSTR (30, 30);
4910   unsigned vm = INSTR (20, 16);
4911   unsigned vn = INSTR (9, 5);
4912   unsigned vd = INSTR (4, 0);
4913   unsigned i;
4914   signed int shift;
4915
4916   NYI_assert (29, 24, 0x0E);
4917   NYI_assert (21, 21, 1);
4918   NYI_assert (15, 10, 0x11);
4919
4920   /* FIXME: What is a signed shift left in this context ?.  */
4921
4922   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4923   switch (INSTR (23, 22))
4924     {
4925     case 0:
4926       for (i = 0; i < (full ? 16 : 8); i++)
4927         {
4928           shift = aarch64_get_vec_s8 (cpu, vm, i);
4929           if (shift >= 0)
4930             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4931                                 << shift);
4932           else
4933             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4934                                 >> - shift);
4935         }
4936       return;
4937
4938     case 1:
4939       for (i = 0; i < (full ? 8 : 4); i++)
4940         {
4941           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4942           if (shift >= 0)
4943             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4944                                  << shift);
4945           else
4946             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4947                                  >> - shift);
4948         }
4949       return;
4950
4951     case 2:
4952       for (i = 0; i < (full ? 4 : 2); i++)
4953         {
4954           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4955           if (shift >= 0)
4956             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4957                                  << shift);
4958           else
4959             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4960                                  >> - shift);
4961         }
4962       return;
4963
4964     case 3:
4965       if (! full)
4966         HALT_UNALLOC;
4967       for (i = 0; i < 2; i++)
4968         {
4969           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4970           if (shift >= 0)
4971             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4972                                  << shift);
4973           else
4974             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4975                                  >> - shift);
4976         }
4977       return;
4978     }
4979 }
4980
4981 static void
4982 do_vec_USHL (sim_cpu *cpu)
4983 {
4984   /* instr[31]    = 0
4985      instr[30]    = first part (0)/ second part (1)
4986      instr[29,24] = 10 1110
4987      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4988      instr[21]    = 1
4989      instr[20,16] = Vm
4990      instr[15,10] = 0100 01
4991      instr[9,5]   = Vn
4992      instr[4,0]   = Vd  */
4993
4994   unsigned full = INSTR (30, 30);
4995   unsigned vm = INSTR (20, 16);
4996   unsigned vn = INSTR (9, 5);
4997   unsigned vd = INSTR (4, 0);
4998   unsigned i;
4999   signed int shift;
5000
5001   NYI_assert (29, 24, 0x2E);
5002   NYI_assert (15, 10, 0x11);
5003
5004   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5005   switch (INSTR (23, 22))
5006     {
5007     case 0:
5008         for (i = 0; i < (full ? 16 : 8); i++)
5009           {
5010             shift = aarch64_get_vec_s8 (cpu, vm, i);
5011             if (shift >= 0)
5012               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5013                                   << shift);
5014             else
5015               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5016                                   >> - shift);
5017           }
5018       return;
5019
5020     case 1:
5021       for (i = 0; i < (full ? 8 : 4); i++)
5022         {
5023           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5024           if (shift >= 0)
5025             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5026                                  << shift);
5027           else
5028             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5029                                  >> - shift);
5030         }
5031       return;
5032
5033     case 2:
5034       for (i = 0; i < (full ? 4 : 2); i++)
5035         {
5036           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5037           if (shift >= 0)
5038             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5039                                  << shift);
5040           else
5041             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5042                                  >> - shift);
5043         }
5044       return;
5045
5046     case 3:
5047       if (! full)
5048         HALT_UNALLOC;
5049       for (i = 0; i < 2; i++)
5050         {
5051           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5052           if (shift >= 0)
5053             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5054                                  << shift);
5055           else
5056             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5057                                  >> - shift);
5058         }
5059       return;
5060     }
5061 }
5062
5063 static void
5064 do_vec_FMLA (sim_cpu *cpu)
5065 {
5066   /* instr[31]    = 0
5067      instr[30]    = full/half selector
5068      instr[29,23] = 0011100
5069      instr[22]    = size: 0=>float, 1=>double
5070      instr[21]    = 1
5071      instr[20,16] = Vn
5072      instr[15,10] = 1100 11
5073      instr[9,5]   = Vm
5074      instr[4.0]   = Vd.  */
5075
5076   unsigned vm = INSTR (20, 16);
5077   unsigned vn = INSTR (9, 5);
5078   unsigned vd = INSTR (4, 0);
5079   unsigned i;
5080   int      full = INSTR (30, 30);
5081
5082   NYI_assert (29, 23, 0x1C);
5083   NYI_assert (21, 21, 1);
5084   NYI_assert (15, 10, 0x33);
5085
5086   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5087   if (INSTR (22, 22))
5088     {
5089       if (! full)
5090         HALT_UNALLOC;
5091       for (i = 0; i < 2; i++)
5092         aarch64_set_vec_double (cpu, vd, i,
5093                                 aarch64_get_vec_double (cpu, vn, i) *
5094                                 aarch64_get_vec_double (cpu, vm, i) +
5095                                 aarch64_get_vec_double (cpu, vd, i));
5096     }
5097   else
5098     {
5099       for (i = 0; i < (full ? 4 : 2); i++)
5100         aarch64_set_vec_float (cpu, vd, i,
5101                                aarch64_get_vec_float (cpu, vn, i) *
5102                                aarch64_get_vec_float (cpu, vm, i) +
5103                                aarch64_get_vec_float (cpu, vd, i));
5104     }
5105 }
5106
5107 static void
5108 do_vec_max (sim_cpu *cpu)
5109 {
5110   /* instr[31]    = 0
5111      instr[30]    = full/half selector
5112      instr[29]    = SMAX (0) / UMAX (1)
5113      instr[28,24] = 0 1110
5114      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5115      instr[21]    = 1
5116      instr[20,16] = Vn
5117      instr[15,10] = 0110 01
5118      instr[9,5]   = Vm
5119      instr[4.0]   = Vd.  */
5120
5121   unsigned vm = INSTR (20, 16);
5122   unsigned vn = INSTR (9, 5);
5123   unsigned vd = INSTR (4, 0);
5124   unsigned i;
5125   int      full = INSTR (30, 30);
5126
5127   NYI_assert (28, 24, 0x0E);
5128   NYI_assert (21, 21, 1);
5129   NYI_assert (15, 10, 0x19);
5130
5131   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5132   if (INSTR (29, 29))
5133     {
5134       switch (INSTR (23, 22))
5135         {
5136         case 0:
5137           for (i = 0; i < (full ? 16 : 8); i++)
5138             aarch64_set_vec_u8 (cpu, vd, i,
5139                                 aarch64_get_vec_u8 (cpu, vn, i)
5140                                 > aarch64_get_vec_u8 (cpu, vm, i)
5141                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5142                                 : aarch64_get_vec_u8 (cpu, vm, i));
5143           return;
5144
5145         case 1:
5146           for (i = 0; i < (full ? 8 : 4); i++)
5147             aarch64_set_vec_u16 (cpu, vd, i,
5148                                  aarch64_get_vec_u16 (cpu, vn, i)
5149                                  > aarch64_get_vec_u16 (cpu, vm, i)
5150                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5151                                  : aarch64_get_vec_u16 (cpu, vm, i));
5152           return;
5153
5154         case 2:
5155           for (i = 0; i < (full ? 4 : 2); i++)
5156             aarch64_set_vec_u32 (cpu, vd, i,
5157                                  aarch64_get_vec_u32 (cpu, vn, i)
5158                                  > aarch64_get_vec_u32 (cpu, vm, i)
5159                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5160                                  : aarch64_get_vec_u32 (cpu, vm, i));
5161           return;
5162
5163         case 3:
5164           HALT_UNALLOC;
5165         }
5166     }
5167   else
5168     {
5169       switch (INSTR (23, 22))
5170         {
5171         case 0:
5172           for (i = 0; i < (full ? 16 : 8); i++)
5173             aarch64_set_vec_s8 (cpu, vd, i,
5174                                 aarch64_get_vec_s8 (cpu, vn, i)
5175                                 > aarch64_get_vec_s8 (cpu, vm, i)
5176                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5177                                 : aarch64_get_vec_s8 (cpu, vm, i));
5178           return;
5179
5180         case 1:
5181           for (i = 0; i < (full ? 8 : 4); i++)
5182             aarch64_set_vec_s16 (cpu, vd, i,
5183                                  aarch64_get_vec_s16 (cpu, vn, i)
5184                                  > aarch64_get_vec_s16 (cpu, vm, i)
5185                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5186                                  : aarch64_get_vec_s16 (cpu, vm, i));
5187           return;
5188
5189         case 2:
5190           for (i = 0; i < (full ? 4 : 2); i++)
5191             aarch64_set_vec_s32 (cpu, vd, i,
5192                                  aarch64_get_vec_s32 (cpu, vn, i)
5193                                  > aarch64_get_vec_s32 (cpu, vm, i)
5194                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5195                                  : aarch64_get_vec_s32 (cpu, vm, i));
5196           return;
5197
5198         case 3:
5199           HALT_UNALLOC;
5200         }
5201     }
5202 }
5203
5204 static void
5205 do_vec_min (sim_cpu *cpu)
5206 {
5207   /* instr[31]    = 0
5208      instr[30]    = full/half selector
5209      instr[29]    = SMIN (0) / UMIN (1)
5210      instr[28,24] = 0 1110
5211      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5212      instr[21]    = 1
5213      instr[20,16] = Vn
5214      instr[15,10] = 0110 11
5215      instr[9,5]   = Vm
5216      instr[4.0]   = Vd.  */
5217
5218   unsigned vm = INSTR (20, 16);
5219   unsigned vn = INSTR (9, 5);
5220   unsigned vd = INSTR (4, 0);
5221   unsigned i;
5222   int      full = INSTR (30, 30);
5223
5224   NYI_assert (28, 24, 0x0E);
5225   NYI_assert (21, 21, 1);
5226   NYI_assert (15, 10, 0x1B);
5227
5228   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5229   if (INSTR (29, 29))
5230     {
5231       switch (INSTR (23, 22))
5232         {
5233         case 0:
5234           for (i = 0; i < (full ? 16 : 8); i++)
5235             aarch64_set_vec_u8 (cpu, vd, i,
5236                                 aarch64_get_vec_u8 (cpu, vn, i)
5237                                 < aarch64_get_vec_u8 (cpu, vm, i)
5238                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5239                                 : aarch64_get_vec_u8 (cpu, vm, i));
5240           return;
5241
5242         case 1:
5243           for (i = 0; i < (full ? 8 : 4); i++)
5244             aarch64_set_vec_u16 (cpu, vd, i,
5245                                  aarch64_get_vec_u16 (cpu, vn, i)
5246                                  < aarch64_get_vec_u16 (cpu, vm, i)
5247                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5248                                  : aarch64_get_vec_u16 (cpu, vm, i));
5249           return;
5250
5251         case 2:
5252           for (i = 0; i < (full ? 4 : 2); i++)
5253             aarch64_set_vec_u32 (cpu, vd, i,
5254                                  aarch64_get_vec_u32 (cpu, vn, i)
5255                                  < aarch64_get_vec_u32 (cpu, vm, i)
5256                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5257                                  : aarch64_get_vec_u32 (cpu, vm, i));
5258           return;
5259
5260         case 3:
5261           HALT_UNALLOC;
5262         }
5263     }
5264   else
5265     {
5266       switch (INSTR (23, 22))
5267         {
5268         case 0:
5269           for (i = 0; i < (full ? 16 : 8); i++)
5270             aarch64_set_vec_s8 (cpu, vd, i,
5271                                 aarch64_get_vec_s8 (cpu, vn, i)
5272                                 < aarch64_get_vec_s8 (cpu, vm, i)
5273                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5274                                 : aarch64_get_vec_s8 (cpu, vm, i));
5275           return;
5276
5277         case 1:
5278           for (i = 0; i < (full ? 8 : 4); i++)
5279             aarch64_set_vec_s16 (cpu, vd, i,
5280                                  aarch64_get_vec_s16 (cpu, vn, i)
5281                                  < aarch64_get_vec_s16 (cpu, vm, i)
5282                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5283                                  : aarch64_get_vec_s16 (cpu, vm, i));
5284           return;
5285
5286         case 2:
5287           for (i = 0; i < (full ? 4 : 2); i++)
5288             aarch64_set_vec_s32 (cpu, vd, i,
5289                                  aarch64_get_vec_s32 (cpu, vn, i)
5290                                  < aarch64_get_vec_s32 (cpu, vm, i)
5291                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5292                                  : aarch64_get_vec_s32 (cpu, vm, i));
5293           return;
5294
5295         case 3:
5296           HALT_UNALLOC;
5297         }
5298     }
5299 }
5300
5301 static void
5302 do_vec_sub_long (sim_cpu *cpu)
5303 {
5304   /* instr[31]    = 0
5305      instr[30]    = lower (0) / upper (1)
5306      instr[29]    = signed (0) / unsigned (1)
5307      instr[28,24] = 0 1110
5308      instr[23,22] = size: bytes (00), half (01), word (10)
5309      instr[21]    = 1
5310      insrt[20,16] = Vm
5311      instr[15,10] = 0010 00
5312      instr[9,5]   = Vn
5313      instr[4,0]   = V dest.  */
5314
5315   unsigned size = INSTR (23, 22);
5316   unsigned vm = INSTR (20, 16);
5317   unsigned vn = INSTR (9, 5);
5318   unsigned vd = INSTR (4, 0);
5319   unsigned bias = 0;
5320   unsigned i;
5321
5322   NYI_assert (28, 24, 0x0E);
5323   NYI_assert (21, 21, 1);
5324   NYI_assert (15, 10, 0x08);
5325
5326   if (size == 3)
5327     HALT_UNALLOC;
5328
5329   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5330   switch (INSTR (30, 29))
5331     {
5332     case 2: /* SSUBL2.  */
5333       bias = 2;
5334     case 0: /* SSUBL.  */
5335       switch (size)
5336         {
5337         case 0:
5338           bias *= 3;
5339           for (i = 0; i < 8; i++)
5340             aarch64_set_vec_s16 (cpu, vd, i,
5341                                  aarch64_get_vec_s8 (cpu, vn, i + bias)
5342                                  - aarch64_get_vec_s8 (cpu, vm, i + bias));
5343           break;
5344
5345         case 1:
5346           bias *= 2;
5347           for (i = 0; i < 4; i++)
5348             aarch64_set_vec_s32 (cpu, vd, i,
5349                                  aarch64_get_vec_s16 (cpu, vn, i + bias)
5350                                  - aarch64_get_vec_s16 (cpu, vm, i + bias));
5351           break;
5352
5353         case 2:
5354           for (i = 0; i < 2; i++)
5355             aarch64_set_vec_s64 (cpu, vd, i,
5356                                  aarch64_get_vec_s32 (cpu, vn, i + bias)
5357                                  - aarch64_get_vec_s32 (cpu, vm, i + bias));
5358           break;
5359
5360         default:
5361           HALT_UNALLOC;
5362         }
5363       break;
5364
5365     case 3: /* USUBL2.  */
5366       bias = 2;
5367     case 1: /* USUBL.  */
5368       switch (size)
5369         {
5370         case 0:
5371           bias *= 3;
5372           for (i = 0; i < 8; i++)
5373             aarch64_set_vec_u16 (cpu, vd, i,
5374                                  aarch64_get_vec_u8 (cpu, vn, i + bias)
5375                                  - aarch64_get_vec_u8 (cpu, vm, i + bias));
5376           break;
5377
5378         case 1:
5379           bias *= 2;
5380           for (i = 0; i < 4; i++)
5381             aarch64_set_vec_u32 (cpu, vd, i,
5382                                  aarch64_get_vec_u16 (cpu, vn, i + bias)
5383                                  - aarch64_get_vec_u16 (cpu, vm, i + bias));
5384           break;
5385
5386         case 2:
5387           for (i = 0; i < 2; i++)
5388             aarch64_set_vec_u64 (cpu, vd, i,
5389                                  aarch64_get_vec_u32 (cpu, vn, i + bias)
5390                                  - aarch64_get_vec_u32 (cpu, vm, i + bias));
5391           break;
5392
5393         default:
5394           HALT_UNALLOC;
5395         }
5396       break;
5397     }
5398 }
5399
5400 static void
5401 do_vec_ADDP (sim_cpu *cpu)
5402 {
5403   /* instr[31]    = 0
5404      instr[30]    = half(0)/full(1)
5405      instr[29,24] = 00 1110
5406      instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5407      instr[21]    = 1
5408      insrt[20,16] = Vm
5409      instr[15,10] = 1011 11
5410      instr[9,5]   = Vn
5411      instr[4,0]   = V dest.  */
5412
5413   FRegister copy_vn;
5414   FRegister copy_vm;
5415   unsigned full = INSTR (30, 30);
5416   unsigned size = INSTR (23, 22);
5417   unsigned vm = INSTR (20, 16);
5418   unsigned vn = INSTR (9, 5);
5419   unsigned vd = INSTR (4, 0);
5420   unsigned i, range;
5421
5422   NYI_assert (29, 24, 0x0E);
5423   NYI_assert (21, 21, 1);
5424   NYI_assert (15, 10, 0x2F);
5425
5426   /* Make copies of the source registers in case vd == vn/vm.  */
5427   copy_vn = cpu->fr[vn];
5428   copy_vm = cpu->fr[vm];
5429
5430   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5431   switch (size)
5432     {
5433     case 0:
5434       range = full ? 8 : 4;
5435       for (i = 0; i < range; i++)
5436         {
5437           aarch64_set_vec_u8 (cpu, vd, i,
5438                               copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5439           aarch64_set_vec_u8 (cpu, vd, i + range,
5440                               copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5441         }
5442       return;
5443
5444     case 1:
5445       range = full ? 4 : 2;
5446       for (i = 0; i < range; i++)
5447         {
5448           aarch64_set_vec_u16 (cpu, vd, i,
5449                                copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5450           aarch64_set_vec_u16 (cpu, vd, i + range,
5451                                copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5452         }
5453       return;
5454
5455     case 2:
5456       range = full ? 2 : 1;
5457       for (i = 0; i < range; i++)
5458         {
5459           aarch64_set_vec_u32 (cpu, vd, i,
5460                                copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5461           aarch64_set_vec_u32 (cpu, vd, i + range,
5462                                copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5463         }
5464       return;
5465
5466     case 3:
5467       if (! full)
5468         HALT_UNALLOC;
5469       aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5470       aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5471       return;
5472     }
5473 }
5474
5475 /* Float point vector convert to longer (precision).  */
5476 static void
5477 do_vec_FCVTL (sim_cpu *cpu)
5478 {
5479   /* instr[31]    = 0
5480      instr[30]    = half (0) / all (1)
5481      instr[29,23] = 00 1110 0
5482      instr[22]    = single (0) / double (1)
5483      instr[21,10] = 10 0001 0111 10
5484      instr[9,5]   = Rn
5485      instr[4,0]   = Rd.  */
5486
5487   unsigned rn = INSTR (9, 5);
5488   unsigned rd = INSTR (4, 0);
5489   unsigned full = INSTR (30, 30);
5490   unsigned i;
5491
5492   NYI_assert (31, 31, 0);
5493   NYI_assert (29, 23, 0x1C);
5494   NYI_assert (21, 10, 0x85E);
5495
5496   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5497   if (INSTR (22, 22))
5498     {
5499       for (i = 0; i < 2; i++)
5500         aarch64_set_vec_double (cpu, rd, i,
5501                                 aarch64_get_vec_float (cpu, rn, i + 2*full));
5502     }
5503   else
5504     {
5505       HALT_NYI;
5506
5507 #if 0
5508       /* TODO: Implement missing half-float support.  */
5509       for (i = 0; i < 4; i++)
5510         aarch64_set_vec_float (cpu, rd, i,
5511                              aarch64_get_vec_halffloat (cpu, rn, i + 4*full));
5512 #endif
5513     }
5514 }
5515
5516 static void
5517 do_vec_FABS (sim_cpu *cpu)
5518 {
5519   /* instr[31]    = 0
5520      instr[30]    = half(0)/full(1)
5521      instr[29,23] = 00 1110 1
5522      instr[22]    = float(0)/double(1)
5523      instr[21,16] = 10 0000
5524      instr[15,10] = 1111 10
5525      instr[9,5]   = Vn
5526      instr[4,0]   = Vd.  */
5527
5528   unsigned vn = INSTR (9, 5);
5529   unsigned vd = INSTR (4, 0);
5530   unsigned full = INSTR (30, 30);
5531   unsigned i;
5532
5533   NYI_assert (29, 23, 0x1D);
5534   NYI_assert (21, 10, 0x83E);
5535
5536   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5537   if (INSTR (22, 22))
5538     {
5539       if (! full)
5540         HALT_NYI;
5541
5542       for (i = 0; i < 2; i++)
5543         aarch64_set_vec_double (cpu, vd, i,
5544                                 fabs (aarch64_get_vec_double (cpu, vn, i)));
5545     }
5546   else
5547     {
5548       for (i = 0; i < (full ? 4 : 2); i++)
5549         aarch64_set_vec_float (cpu, vd, i,
5550                                fabsf (aarch64_get_vec_float (cpu, vn, i)));
5551     }
5552 }
5553
5554 static void
5555 do_vec_FCVTZS (sim_cpu *cpu)
5556 {
5557   /* instr[31]    = 0
5558      instr[30]    = half (0) / all (1)
5559      instr[29,23] = 00 1110 1
5560      instr[22]    = single (0) / double (1)
5561      instr[21,10] = 10 0001 1011 10
5562      instr[9,5]   = Rn
5563      instr[4,0]   = Rd.  */
5564
5565   unsigned rn = INSTR (9, 5);
5566   unsigned rd = INSTR (4, 0);
5567   unsigned full = INSTR (30, 30);
5568   unsigned i;
5569
5570   NYI_assert (31, 31, 0);
5571   NYI_assert (29, 23, 0x1D);
5572   NYI_assert (21, 10, 0x86E);
5573
5574   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5575   if (INSTR (22, 22))
5576     {
5577       if (! full)
5578         HALT_UNALLOC;
5579
5580       for (i = 0; i < 2; i++)
5581         aarch64_set_vec_s64 (cpu, rd, i,
5582                              (int64_t) aarch64_get_vec_double (cpu, rn, i));
5583     }
5584   else
5585     for (i = 0; i < (full ? 4 : 2); i++)
5586       aarch64_set_vec_s32 (cpu, rd, i,
5587                            (int32_t) aarch64_get_vec_float (cpu, rn, i));
5588 }
5589
5590 static void
5591 do_vec_REV64 (sim_cpu *cpu)
5592 {
5593   /* instr[31]    = 0
5594      instr[30]    = full/half
5595      instr[29,24] = 00 1110
5596      instr[23,22] = size
5597      instr[21,10] = 10 0000 0000 10
5598      instr[9,5]   = Rn
5599      instr[4,0]   = Rd.  */
5600
5601   unsigned rn = INSTR (9, 5);
5602   unsigned rd = INSTR (4, 0);
5603   unsigned size = INSTR (23, 22);
5604   unsigned full = INSTR (30, 30);
5605   unsigned i;
5606   FRegister val;
5607
5608   NYI_assert (29, 24, 0x0E);
5609   NYI_assert (21, 10, 0x802);
5610
5611   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5612   switch (size)
5613     {
5614     case 0:
5615       for (i = 0; i < (full ? 16 : 8); i++)
5616         val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5617       break;
5618
5619     case 1:
5620       for (i = 0; i < (full ? 8 : 4); i++)
5621         val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5622       break;
5623
5624     case 2:
5625       for (i = 0; i < (full ? 4 : 2); i++)
5626         val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5627       break;
5628
5629     case 3:
5630       HALT_UNALLOC;
5631     }
5632
5633   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5634   if (full)
5635     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5636 }
5637
5638 static void
5639 do_vec_REV16 (sim_cpu *cpu)
5640 {
5641   /* instr[31]    = 0
5642      instr[30]    = full/half
5643      instr[29,24] = 00 1110
5644      instr[23,22] = size
5645      instr[21,10] = 10 0000 0001 10
5646      instr[9,5]   = Rn
5647      instr[4,0]   = Rd.  */
5648
5649   unsigned rn = INSTR (9, 5);
5650   unsigned rd = INSTR (4, 0);
5651   unsigned size = INSTR (23, 22);
5652   unsigned full = INSTR (30, 30);
5653   unsigned i;
5654   FRegister val;
5655
5656   NYI_assert (29, 24, 0x0E);
5657   NYI_assert (21, 10, 0x806);
5658
5659   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5660   switch (size)
5661     {
5662     case 0:
5663       for (i = 0; i < (full ? 16 : 8); i++)
5664         val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5665       break;
5666
5667     default:
5668       HALT_UNALLOC;
5669     }
5670
5671   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5672   if (full)
5673     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5674 }
5675
5676 static void
5677 do_vec_op1 (sim_cpu *cpu)
5678 {
5679   /* instr[31]    = 0
5680      instr[30]    = half/full
5681      instr[29,24] = 00 1110
5682      instr[23,21] = ???
5683      instr[20,16] = Vm
5684      instr[15,10] = sub-opcode
5685      instr[9,5]   = Vn
5686      instr[4,0]   = Vd  */
5687   NYI_assert (29, 24, 0x0E);
5688
5689   if (INSTR (21, 21) == 0)
5690     {
5691       if (INSTR (23, 22) == 0)
5692         {
5693           if (INSTR (30, 30) == 1
5694               && INSTR (17, 14) == 0
5695               && INSTR (12, 10) == 7)
5696             return do_vec_ins_2 (cpu);
5697
5698           switch (INSTR (15, 10))
5699             {
5700             case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5701             case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5702             case 0x07: do_vec_INS (cpu); return;
5703             case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5704             case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5705
5706             case 0x00:
5707             case 0x08:
5708             case 0x10:
5709             case 0x18:
5710               do_vec_TBL (cpu); return;
5711
5712             case 0x06:
5713             case 0x16:
5714               do_vec_UZP (cpu); return;
5715
5716             case 0x0A: do_vec_TRN (cpu); return;
5717
5718             case 0x0E:
5719             case 0x1E:
5720               do_vec_ZIP (cpu); return;
5721
5722             default:
5723               HALT_NYI;
5724             }
5725         }
5726
5727       switch (INSTR (13, 10))
5728         {
5729         case 0x6: do_vec_UZP (cpu); return;
5730         case 0xE: do_vec_ZIP (cpu); return;
5731         case 0xA: do_vec_TRN (cpu); return;
5732         default:  HALT_NYI;
5733         }
5734     }
5735
5736   switch (INSTR (15, 10))
5737     {
5738     case 0x02: do_vec_REV64 (cpu); return;
5739     case 0x06: do_vec_REV16 (cpu); return;
5740
5741     case 0x07:
5742       switch (INSTR (23, 21))
5743         {
5744         case 1: do_vec_AND (cpu); return;
5745         case 3: do_vec_BIC (cpu); return;
5746         case 5: do_vec_ORR (cpu); return;
5747         case 7: do_vec_ORN (cpu); return;
5748         default: HALT_NYI;
5749         }
5750
5751     case 0x08: do_vec_sub_long (cpu); return;
5752     case 0x0a: do_vec_XTN (cpu); return;
5753     case 0x11: do_vec_SSHL (cpu); return;
5754     case 0x16: do_vec_CNT (cpu); return;
5755     case 0x19: do_vec_max (cpu); return;
5756     case 0x1B: do_vec_min (cpu); return;
5757     case 0x21: do_vec_add (cpu); return;
5758     case 0x25: do_vec_MLA (cpu); return;
5759     case 0x27: do_vec_mul (cpu); return;
5760     case 0x2F: do_vec_ADDP (cpu); return;
5761     case 0x30: do_vec_mull (cpu); return;
5762     case 0x33: do_vec_FMLA (cpu); return;
5763     case 0x35: do_vec_fadd (cpu); return;
5764
5765     case 0x1E:
5766       switch (INSTR (20, 16))
5767         {
5768         case 0x01: do_vec_FCVTL (cpu); return;
5769         default: HALT_NYI;
5770         }
5771
5772     case 0x2E:
5773       switch (INSTR (20, 16))
5774         {
5775         case 0x00: do_vec_ABS (cpu); return;
5776         case 0x01: do_vec_FCVTZS (cpu); return;
5777         case 0x11: do_vec_ADDV (cpu); return;
5778         default: HALT_NYI;
5779         }
5780
5781     case 0x31:
5782     case 0x3B:
5783       do_vec_Fminmax (cpu); return;
5784
5785     case 0x0D:
5786     case 0x0F:
5787     case 0x22:
5788     case 0x23:
5789     case 0x26:
5790     case 0x2A:
5791     case 0x32:
5792     case 0x36:
5793     case 0x39:
5794     case 0x3A:
5795       do_vec_compare (cpu); return;
5796
5797     case 0x3E:
5798       do_vec_FABS (cpu); return;
5799
5800     default:
5801       HALT_NYI;
5802     }
5803 }
5804
5805 static void
5806 do_vec_xtl (sim_cpu *cpu)
5807 {
5808   /* instr[31]    = 0
5809      instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5810      instr[28,22] = 0 1111 00
5811      instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5812      instr[15,10] = 1010 01
5813      instr[9,5]   = V source
5814      instr[4,0]   = V dest.  */
5815
5816   unsigned vs = INSTR (9, 5);
5817   unsigned vd = INSTR (4, 0);
5818   unsigned i, shift, bias = 0;
5819
5820   NYI_assert (28, 22, 0x3C);
5821   NYI_assert (15, 10, 0x29);
5822
5823   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5824   switch (INSTR (30, 29))
5825     {
5826     case 2: /* SXTL2, SSHLL2.  */
5827       bias = 2;
5828     case 0: /* SXTL, SSHLL.  */
5829       if (INSTR (21, 21))
5830         {
5831           int64_t val1, val2;
5832
5833           shift = INSTR (20, 16);
5834           /* Get the source values before setting the destination values
5835              in case the source and destination are the same.  */
5836           val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5837           val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5838           aarch64_set_vec_s64 (cpu, vd, 0, val1);
5839           aarch64_set_vec_s64 (cpu, vd, 1, val2);
5840         }
5841       else if (INSTR (20, 20))
5842         {
5843           int32_t v[4];
5844           int32_t v1,v2,v3,v4;
5845
5846           shift = INSTR (19, 16);
5847           bias *= 2;
5848           for (i = 0; i < 4; i++)
5849             v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5850           for (i = 0; i < 4; i++)
5851             aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5852         }
5853       else
5854         {
5855           int16_t v[8];
5856           NYI_assert (19, 19, 1);
5857
5858           shift = INSTR (18, 16);
5859           bias *= 4;
5860           for (i = 0; i < 8; i++)
5861             v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5862           for (i = 0; i < 8; i++)
5863             aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5864         }
5865       return;
5866
5867     case 3: /* UXTL2, USHLL2.  */
5868       bias = 2;
5869     case 1: /* UXTL, USHLL.  */
5870       if (INSTR (21, 21))
5871         {
5872           uint64_t v1, v2;
5873           shift = INSTR (20, 16);
5874           v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5875           v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5876           aarch64_set_vec_u64 (cpu, vd, 0, v1);
5877           aarch64_set_vec_u64 (cpu, vd, 1, v2);
5878         }
5879       else if (INSTR (20, 20))
5880         {
5881           uint32_t v[4];
5882           shift = INSTR (19, 16);
5883           bias *= 2;
5884           for (i = 0; i < 4; i++)
5885             v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5886           for (i = 0; i < 4; i++)
5887             aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5888         }
5889       else
5890         {
5891           uint16_t v[8];
5892           NYI_assert (19, 19, 1);
5893
5894           shift = INSTR (18, 16);
5895           bias *= 4;
5896           for (i = 0; i < 8; i++)
5897             v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5898           for (i = 0; i < 8; i++)
5899             aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5900         }
5901       return;
5902     }
5903 }
5904
5905 static void
5906 do_vec_SHL (sim_cpu *cpu)
5907 {
5908   /* instr [31]    = 0
5909      instr [30]    = half(0)/full(1)
5910      instr [29,23] = 001 1110
5911      instr [22,16] = size and shift amount
5912      instr [15,10] = 01 0101
5913      instr [9, 5]  = Vs
5914      instr [4, 0]  = Vd.  */
5915
5916   int shift;
5917   int full    = INSTR (30, 30);
5918   unsigned vs = INSTR (9, 5);
5919   unsigned vd = INSTR (4, 0);
5920   unsigned i;
5921
5922   NYI_assert (29, 23, 0x1E);
5923   NYI_assert (15, 10, 0x15);
5924
5925   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5926   if (INSTR (22, 22))
5927     {
5928       shift = INSTR (21, 16);
5929
5930       if (full == 0)
5931         HALT_UNALLOC;
5932
5933       for (i = 0; i < 2; i++)
5934         {
5935           uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5936           aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5937         }
5938
5939       return;
5940     }
5941
5942   if (INSTR (21, 21))
5943     {
5944       shift = INSTR (20, 16);
5945
5946       for (i = 0; i < (full ? 4 : 2); i++)
5947         {
5948           uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5949           aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5950         }
5951
5952       return;
5953     }
5954
5955   if (INSTR (20, 20))
5956     {
5957       shift = INSTR (19, 16);
5958
5959       for (i = 0; i < (full ? 8 : 4); i++)
5960         {
5961           uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5962           aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5963         }
5964
5965       return;
5966     }
5967
5968   if (INSTR (19, 19) == 0)
5969     HALT_UNALLOC;
5970
5971   shift = INSTR (18, 16);
5972
5973   for (i = 0; i < (full ? 16 : 8); i++)
5974     {
5975       uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5976       aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5977     }
5978 }
5979
5980 static void
5981 do_vec_SSHR_USHR (sim_cpu *cpu)
5982 {
5983   /* instr [31]    = 0
5984      instr [30]    = half(0)/full(1)
5985      instr [29]    = signed(0)/unsigned(1)
5986      instr [28,23] = 0 1111 0
5987      instr [22,16] = size and shift amount
5988      instr [15,10] = 0000 01
5989      instr [9, 5]  = Vs
5990      instr [4, 0]  = Vd.  */
5991
5992   int full       = INSTR (30, 30);
5993   int sign       = ! INSTR (29, 29);
5994   unsigned shift = INSTR (22, 16);
5995   unsigned vs    = INSTR (9, 5);
5996   unsigned vd    = INSTR (4, 0);
5997   unsigned i;
5998
5999   NYI_assert (28, 23, 0x1E);
6000   NYI_assert (15, 10, 0x01);
6001
6002   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6003   if (INSTR (22, 22))
6004     {
6005       shift = 128 - shift;
6006
6007       if (full == 0)
6008         HALT_UNALLOC;
6009
6010       if (sign)
6011         for (i = 0; i < 2; i++)
6012           {
6013             int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
6014             aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
6015           }
6016       else
6017         for (i = 0; i < 2; i++)
6018           {
6019             uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
6020             aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
6021           }
6022
6023       return;
6024     }
6025
6026   if (INSTR (21, 21))
6027     {
6028       shift = 64 - shift;
6029
6030       if (sign)
6031         for (i = 0; i < (full ? 4 : 2); i++)
6032           {
6033             int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
6034             aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
6035           }
6036       else
6037         for (i = 0; i < (full ? 4 : 2); i++)
6038           {
6039             uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
6040             aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
6041           }
6042
6043       return;
6044     }
6045
6046   if (INSTR (20, 20))
6047     {
6048       shift = 32 - shift;
6049
6050       if (sign)
6051         for (i = 0; i < (full ? 8 : 4); i++)
6052           {
6053             int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6054             aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6055           }
6056       else
6057         for (i = 0; i < (full ? 8 : 4); i++)
6058           {
6059             uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6060             aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6061           }
6062
6063       return;
6064     }
6065
6066   if (INSTR (19, 19) == 0)
6067     HALT_UNALLOC;
6068
6069   shift = 16 - shift;
6070
6071   if (sign)
6072     for (i = 0; i < (full ? 16 : 8); i++)
6073       {
6074         int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6075         aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6076       }
6077   else
6078     for (i = 0; i < (full ? 16 : 8); i++)
6079       {
6080         uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6081         aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6082       }
6083 }
6084
6085 static void
6086 do_vec_MUL_by_element (sim_cpu *cpu)
6087 {
6088   /* instr[31]    = 0
6089      instr[30]    = half/full
6090      instr[29,24] = 00 1111
6091      instr[23,22] = size
6092      instr[21]    = L
6093      instr[20]    = M
6094      instr[19,16] = m
6095      instr[15,12] = 1000
6096      instr[11]    = H
6097      instr[10]    = 0
6098      instr[9,5]   = Vn
6099      instr[4,0]   = Vd  */
6100
6101   unsigned full     = INSTR (30, 30);
6102   unsigned L        = INSTR (21, 21);
6103   unsigned H        = INSTR (11, 11);
6104   unsigned vn       = INSTR (9, 5);
6105   unsigned vd       = INSTR (4, 0);
6106   unsigned size     = INSTR (23, 22);
6107   unsigned index;
6108   unsigned vm;
6109   unsigned e;
6110
6111   NYI_assert (29, 24, 0x0F);
6112   NYI_assert (15, 12, 0x8);
6113   NYI_assert (10, 10, 0);
6114
6115   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6116   switch (size)
6117     {
6118     case 1:
6119       {
6120         /* 16 bit products.  */
6121         uint16_t product;
6122         uint16_t element1;
6123         uint16_t element2;
6124
6125         index = (H << 2) | (L << 1) | INSTR (20, 20);
6126         vm = INSTR (19, 16);
6127         element2 = aarch64_get_vec_u16 (cpu, vm, index);
6128
6129         for (e = 0; e < (full ? 8 : 4); e ++)
6130           {
6131             element1 = aarch64_get_vec_u16 (cpu, vn, e);
6132             product  = element1 * element2;
6133             aarch64_set_vec_u16 (cpu, vd, e, product);
6134           }
6135       }
6136       break;
6137
6138     case 2:
6139       {
6140         /* 32 bit products.  */
6141         uint32_t product;
6142         uint32_t element1;
6143         uint32_t element2;
6144
6145         index = (H << 1) | L;
6146         vm = INSTR (20, 16);
6147         element2 = aarch64_get_vec_u32 (cpu, vm, index);
6148
6149         for (e = 0; e < (full ? 4 : 2); e ++)
6150           {
6151             element1 = aarch64_get_vec_u32 (cpu, vn, e);
6152             product  = element1 * element2;
6153             aarch64_set_vec_u32 (cpu, vd, e, product);
6154           }
6155       }
6156       break;
6157
6158     default:
6159       HALT_UNALLOC;
6160     }
6161 }
6162
6163 static void
6164 do_FMLA_by_element (sim_cpu *cpu)
6165 {
6166   /* instr[31]    = 0
6167      instr[30]    = half/full
6168      instr[29,23] = 00 1111 1
6169      instr[22]    = size
6170      instr[21]    = L
6171      instr[20,16] = m
6172      instr[15,12] = 0001
6173      instr[11]    = H
6174      instr[10]    = 0
6175      instr[9,5]   = Vn
6176      instr[4,0]   = Vd  */
6177
6178   unsigned full     = INSTR (30, 30);
6179   unsigned size     = INSTR (22, 22);
6180   unsigned L        = INSTR (21, 21);
6181   unsigned vm       = INSTR (20, 16);
6182   unsigned H        = INSTR (11, 11);
6183   unsigned vn       = INSTR (9, 5);
6184   unsigned vd       = INSTR (4, 0);
6185   unsigned e;
6186
6187   NYI_assert (29, 23, 0x1F);
6188   NYI_assert (15, 12, 0x1);
6189   NYI_assert (10, 10, 0);
6190
6191   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6192   if (size)
6193     {
6194       double element1, element2;
6195
6196       if (! full || L)
6197         HALT_UNALLOC;
6198
6199       element2 = aarch64_get_vec_double (cpu, vm, H);
6200
6201       for (e = 0; e < 2; e++)
6202         {
6203           element1 = aarch64_get_vec_double (cpu, vn, e);
6204           element1 *= element2;
6205           element1 += aarch64_get_vec_double (cpu, vd, e);
6206           aarch64_set_vec_double (cpu, vd, e, element1);
6207         }
6208     }
6209   else
6210     {
6211       float element1;
6212       float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6213
6214       for (e = 0; e < (full ? 4 : 2); e++)
6215         {
6216           element1 = aarch64_get_vec_float (cpu, vn, e);
6217           element1 *= element2;
6218           element1 += aarch64_get_vec_float (cpu, vd, e);
6219           aarch64_set_vec_float (cpu, vd, e, element1);
6220         }
6221     }
6222 }
6223
6224 static void
6225 do_vec_op2 (sim_cpu *cpu)
6226 {
6227   /* instr[31]    = 0
6228      instr[30]    = half/full
6229      instr[29,24] = 00 1111
6230      instr[23]    = ?
6231      instr[22,16] = element size & index
6232      instr[15,10] = sub-opcode
6233      instr[9,5]   = Vm
6234      instr[4,0]   = Vd  */
6235
6236   NYI_assert (29, 24, 0x0F);
6237
6238   if (INSTR (23, 23) != 0)
6239     {
6240       switch (INSTR (15, 10))
6241         {
6242         case 0x04:
6243         case 0x06:
6244           do_FMLA_by_element (cpu);
6245           return;
6246
6247         case 0x20:
6248         case 0x22:
6249           do_vec_MUL_by_element (cpu);
6250           return;
6251
6252         default:
6253           HALT_NYI;
6254         }
6255     }
6256   else
6257     {
6258       switch (INSTR (15, 10))
6259         {
6260         case 0x01: do_vec_SSHR_USHR (cpu); return;
6261         case 0x15: do_vec_SHL (cpu); return;
6262         case 0x20:
6263         case 0x22: do_vec_MUL_by_element (cpu); return;
6264         case 0x29: do_vec_xtl (cpu); return;
6265         default:   HALT_NYI;
6266         }
6267     }
6268 }
6269
6270 static void
6271 do_vec_neg (sim_cpu *cpu)
6272 {
6273   /* instr[31]    = 0
6274      instr[30]    = full(1)/half(0)
6275      instr[29,24] = 10 1110
6276      instr[23,22] = size: byte(00), half (01), word (10), long (11)
6277      instr[21,10] = 1000 0010 1110
6278      instr[9,5]   = Vs
6279      instr[4,0]   = Vd  */
6280
6281   int    full = INSTR (30, 30);
6282   unsigned vs = INSTR (9, 5);
6283   unsigned vd = INSTR (4, 0);
6284   unsigned i;
6285
6286   NYI_assert (29, 24, 0x2E);
6287   NYI_assert (21, 10, 0x82E);
6288
6289   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6290   switch (INSTR (23, 22))
6291     {
6292     case 0:
6293       for (i = 0; i < (full ? 16 : 8); i++)
6294         aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6295       return;
6296
6297     case 1:
6298       for (i = 0; i < (full ? 8 : 4); i++)
6299         aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6300       return;
6301
6302     case 2:
6303       for (i = 0; i < (full ? 4 : 2); i++)
6304         aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6305       return;
6306
6307     case 3:
6308       if (! full)
6309         HALT_NYI;
6310       for (i = 0; i < 2; i++)
6311         aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6312       return;
6313     }
6314 }
6315
6316 static void
6317 do_vec_sqrt (sim_cpu *cpu)
6318 {
6319   /* instr[31]    = 0
6320      instr[30]    = full(1)/half(0)
6321      instr[29,23] = 101 1101
6322      instr[22]    = single(0)/double(1)
6323      instr[21,10] = 1000 0111 1110
6324      instr[9,5]   = Vs
6325      instr[4,0]   = Vd.  */
6326
6327   int    full = INSTR (30, 30);
6328   unsigned vs = INSTR (9, 5);
6329   unsigned vd = INSTR (4, 0);
6330   unsigned i;
6331
6332   NYI_assert (29, 23, 0x5B);
6333   NYI_assert (21, 10, 0x87E);
6334
6335   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6336   if (INSTR (22, 22) == 0)
6337     for (i = 0; i < (full ? 4 : 2); i++)
6338       aarch64_set_vec_float (cpu, vd, i,
6339                              sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6340   else
6341     for (i = 0; i < 2; i++)
6342       aarch64_set_vec_double (cpu, vd, i,
6343                               sqrt (aarch64_get_vec_double (cpu, vs, i)));
6344 }
6345
6346 static void
6347 do_vec_mls_indexed (sim_cpu *cpu)
6348 {
6349   /* instr[31]       = 0
6350      instr[30]       = half(0)/full(1)
6351      instr[29,24]    = 10 1111
6352      instr[23,22]    = 16-bit(01)/32-bit(10)
6353      instr[21,20+11] = index (if 16-bit)
6354      instr[21+11]    = index (if 32-bit)
6355      instr[20,16]    = Vm
6356      instr[15,12]    = 0100
6357      instr[11]       = part of index
6358      instr[10]       = 0
6359      instr[9,5]      = Vs
6360      instr[4,0]      = Vd.  */
6361
6362   int    full = INSTR (30, 30);
6363   unsigned vs = INSTR (9, 5);
6364   unsigned vd = INSTR (4, 0);
6365   unsigned vm = INSTR (20, 16);
6366   unsigned i;
6367
6368   NYI_assert (15, 12, 4);
6369   NYI_assert (10, 10, 0);
6370
6371   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6372   switch (INSTR (23, 22))
6373     {
6374     case 1:
6375       {
6376         unsigned elem;
6377         uint32_t val;
6378
6379         if (vm > 15)
6380           HALT_NYI;
6381
6382         elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6383         val = aarch64_get_vec_u16 (cpu, vm, elem);
6384
6385         for (i = 0; i < (full ? 8 : 4); i++)
6386           aarch64_set_vec_u32 (cpu, vd, i,
6387                                aarch64_get_vec_u32 (cpu, vd, i) -
6388                                (aarch64_get_vec_u32 (cpu, vs, i) * val));
6389         return;
6390       }
6391
6392     case 2:
6393       {
6394         unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6395         uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6396
6397         for (i = 0; i < (full ? 4 : 2); i++)
6398           aarch64_set_vec_u64 (cpu, vd, i,
6399                                aarch64_get_vec_u64 (cpu, vd, i) -
6400                                (aarch64_get_vec_u64 (cpu, vs, i) * val));
6401         return;
6402       }
6403
6404     case 0:
6405     case 3:
6406     default:
6407       HALT_NYI;
6408     }
6409 }
6410
6411 static void
6412 do_vec_SUB (sim_cpu *cpu)
6413 {
6414   /* instr [31]    = 0
6415      instr [30]    = half(0)/full(1)
6416      instr [29,24] = 10 1110
6417      instr [23,22] = size: byte(00, half(01), word (10), long (11)
6418      instr [21]    = 1
6419      instr [20,16] = Vm
6420      instr [15,10] = 10 0001
6421      instr [9, 5]  = Vn
6422      instr [4, 0]  = Vd.  */
6423
6424   unsigned full = INSTR (30, 30);
6425   unsigned vm = INSTR (20, 16);
6426   unsigned vn = INSTR (9, 5);
6427   unsigned vd = INSTR (4, 0);
6428   unsigned i;
6429
6430   NYI_assert (29, 24, 0x2E);
6431   NYI_assert (21, 21, 1);
6432   NYI_assert (15, 10, 0x21);
6433
6434   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6435   switch (INSTR (23, 22))
6436     {
6437     case 0:
6438       for (i = 0; i < (full ? 16 : 8); i++)
6439         aarch64_set_vec_s8 (cpu, vd, i,
6440                             aarch64_get_vec_s8 (cpu, vn, i)
6441                             - aarch64_get_vec_s8 (cpu, vm, i));
6442       return;
6443
6444     case 1:
6445       for (i = 0; i < (full ? 8 : 4); i++)
6446         aarch64_set_vec_s16 (cpu, vd, i,
6447                              aarch64_get_vec_s16 (cpu, vn, i)
6448                              - aarch64_get_vec_s16 (cpu, vm, i));
6449       return;
6450
6451     case 2:
6452       for (i = 0; i < (full ? 4 : 2); i++)
6453         aarch64_set_vec_s32 (cpu, vd, i,
6454                              aarch64_get_vec_s32 (cpu, vn, i)
6455                              - aarch64_get_vec_s32 (cpu, vm, i));
6456       return;
6457
6458     case 3:
6459       if (full == 0)
6460         HALT_UNALLOC;
6461
6462       for (i = 0; i < 2; i++)
6463         aarch64_set_vec_s64 (cpu, vd, i,
6464                              aarch64_get_vec_s64 (cpu, vn, i)
6465                              - aarch64_get_vec_s64 (cpu, vm, i));
6466       return;
6467     }
6468 }
6469
6470 static void
6471 do_vec_MLS (sim_cpu *cpu)
6472 {
6473   /* instr [31]    = 0
6474      instr [30]    = half(0)/full(1)
6475      instr [29,24] = 10 1110
6476      instr [23,22] = size: byte(00, half(01), word (10)
6477      instr [21]    = 1
6478      instr [20,16] = Vm
6479      instr [15,10] = 10 0101
6480      instr [9, 5]  = Vn
6481      instr [4, 0]  = Vd.  */
6482
6483   unsigned full = INSTR (30, 30);
6484   unsigned vm = INSTR (20, 16);
6485   unsigned vn = INSTR (9, 5);
6486   unsigned vd = INSTR (4, 0);
6487   unsigned i;
6488
6489   NYI_assert (29, 24, 0x2E);
6490   NYI_assert (21, 21, 1);
6491   NYI_assert (15, 10, 0x25);
6492
6493   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6494   switch (INSTR (23, 22))
6495     {
6496     case 0:
6497       for (i = 0; i < (full ? 16 : 8); i++)
6498         aarch64_set_vec_u8 (cpu, vd, i,
6499                             aarch64_get_vec_u8 (cpu, vd, i)
6500                             - (aarch64_get_vec_u8 (cpu, vn, i)
6501                                * aarch64_get_vec_u8 (cpu, vm, i)));
6502       return;
6503
6504     case 1:
6505       for (i = 0; i < (full ? 8 : 4); i++)
6506         aarch64_set_vec_u16 (cpu, vd, i,
6507                              aarch64_get_vec_u16 (cpu, vd, i)
6508                              - (aarch64_get_vec_u16 (cpu, vn, i)
6509                                 * aarch64_get_vec_u16 (cpu, vm, i)));
6510       return;
6511
6512     case 2:
6513       for (i = 0; i < (full ? 4 : 2); i++)
6514         aarch64_set_vec_u32 (cpu, vd, i,
6515                              aarch64_get_vec_u32 (cpu, vd, i)
6516                              - (aarch64_get_vec_u32 (cpu, vn, i)
6517                                 * aarch64_get_vec_u32 (cpu, vm, i)));
6518       return;
6519
6520     default:
6521       HALT_UNALLOC;
6522     }
6523 }
6524
6525 static void
6526 do_vec_FDIV (sim_cpu *cpu)
6527 {
6528   /* instr [31]    = 0
6529      instr [30]    = half(0)/full(1)
6530      instr [29,23] = 10 1110 0
6531      instr [22]    = float()/double(1)
6532      instr [21]    = 1
6533      instr [20,16] = Vm
6534      instr [15,10] = 1111 11
6535      instr [9, 5]  = Vn
6536      instr [4, 0]  = Vd.  */
6537
6538   unsigned full = INSTR (30, 30);
6539   unsigned vm = INSTR (20, 16);
6540   unsigned vn = INSTR (9, 5);
6541   unsigned vd = INSTR (4, 0);
6542   unsigned i;
6543
6544   NYI_assert (29, 23, 0x5C);
6545   NYI_assert (21, 21, 1);
6546   NYI_assert (15, 10, 0x3F);
6547
6548   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6549   if (INSTR (22, 22))
6550     {
6551       if (! full)
6552         HALT_UNALLOC;
6553
6554       for (i = 0; i < 2; i++)
6555         aarch64_set_vec_double (cpu, vd, i,
6556                                 aarch64_get_vec_double (cpu, vn, i)
6557                                 / aarch64_get_vec_double (cpu, vm, i));
6558     }
6559   else
6560     for (i = 0; i < (full ? 4 : 2); i++)
6561       aarch64_set_vec_float (cpu, vd, i,
6562                              aarch64_get_vec_float (cpu, vn, i)
6563                              / aarch64_get_vec_float (cpu, vm, i));
6564 }
6565
6566 static void
6567 do_vec_FMUL (sim_cpu *cpu)
6568 {
6569   /* instr [31]    = 0
6570      instr [30]    = half(0)/full(1)
6571      instr [29,23] = 10 1110 0
6572      instr [22]    = float(0)/double(1)
6573      instr [21]    = 1
6574      instr [20,16] = Vm
6575      instr [15,10] = 1101 11
6576      instr [9, 5]  = Vn
6577      instr [4, 0]  = Vd.  */
6578
6579   unsigned full = INSTR (30, 30);
6580   unsigned vm = INSTR (20, 16);
6581   unsigned vn = INSTR (9, 5);
6582   unsigned vd = INSTR (4, 0);
6583   unsigned i;
6584
6585   NYI_assert (29, 23, 0x5C);
6586   NYI_assert (21, 21, 1);
6587   NYI_assert (15, 10, 0x37);
6588
6589   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6590   if (INSTR (22, 22))
6591     {
6592       if (! full)
6593         HALT_UNALLOC;
6594
6595       for (i = 0; i < 2; i++)
6596         aarch64_set_vec_double (cpu, vd, i,
6597                                 aarch64_get_vec_double (cpu, vn, i)
6598                                 * aarch64_get_vec_double (cpu, vm, i));
6599     }
6600   else
6601     for (i = 0; i < (full ? 4 : 2); i++)
6602       aarch64_set_vec_float (cpu, vd, i,
6603                              aarch64_get_vec_float (cpu, vn, i)
6604                              * aarch64_get_vec_float (cpu, vm, i));
6605 }
6606
6607 static void
6608 do_vec_FADDP (sim_cpu *cpu)
6609 {
6610   /* instr [31]    = 0
6611      instr [30]    = half(0)/full(1)
6612      instr [29,23] = 10 1110 0
6613      instr [22]    = float(0)/double(1)
6614      instr [21]    = 1
6615      instr [20,16] = Vm
6616      instr [15,10] = 1101 01
6617      instr [9, 5]  = Vn
6618      instr [4, 0]  = Vd.  */
6619
6620   unsigned full = INSTR (30, 30);
6621   unsigned vm = INSTR (20, 16);
6622   unsigned vn = INSTR (9, 5);
6623   unsigned vd = INSTR (4, 0);
6624
6625   NYI_assert (29, 23, 0x5C);
6626   NYI_assert (21, 21, 1);
6627   NYI_assert (15, 10, 0x35);
6628
6629   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6630   if (INSTR (22, 22))
6631     {
6632       /* Extract values before adding them incase vd == vn/vm.  */
6633       double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6634       double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6635       double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6636       double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6637
6638       if (! full)
6639         HALT_UNALLOC;
6640
6641       aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6642       aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6643     }
6644   else
6645     {
6646       /* Extract values before adding them incase vd == vn/vm.  */
6647       float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6648       float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6649       float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6650       float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6651
6652       if (full)
6653         {
6654           float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6655           float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6656           float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6657           float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6658
6659           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6660           aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6661           aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6662           aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6663         }
6664       else
6665         {
6666           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6667           aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6668         }
6669     }
6670 }
6671
6672 static void
6673 do_vec_FSQRT (sim_cpu *cpu)
6674 {
6675   /* instr[31]    = 0
6676      instr[30]    = half(0)/full(1)
6677      instr[29,23] = 10 1110 1
6678      instr[22]    = single(0)/double(1)
6679      instr[21,10] = 10 0001 1111 10
6680      instr[9,5]   = Vsrc
6681      instr[4,0]   = Vdest.  */
6682
6683   unsigned vn = INSTR (9, 5);
6684   unsigned vd = INSTR (4, 0);
6685   unsigned full = INSTR (30, 30);
6686   int i;
6687
6688   NYI_assert (29, 23, 0x5D);
6689   NYI_assert (21, 10, 0x87E);
6690
6691   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6692   if (INSTR (22, 22))
6693     {
6694       if (! full)
6695         HALT_UNALLOC;
6696
6697       for (i = 0; i < 2; i++)
6698         aarch64_set_vec_double (cpu, vd, i,
6699                                 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6700     }
6701   else
6702     {
6703       for (i = 0; i < (full ? 4 : 2); i++)
6704         aarch64_set_vec_float (cpu, vd, i,
6705                                sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6706     }
6707 }
6708
6709 static void
6710 do_vec_FNEG (sim_cpu *cpu)
6711 {
6712   /* instr[31]    = 0
6713      instr[30]    = half (0)/full (1)
6714      instr[29,23] = 10 1110 1
6715      instr[22]    = single (0)/double (1)
6716      instr[21,10] = 10 0000 1111 10
6717      instr[9,5]   = Vsrc
6718      instr[4,0]   = Vdest.  */
6719
6720   unsigned vn = INSTR (9, 5);
6721   unsigned vd = INSTR (4, 0);
6722   unsigned full = INSTR (30, 30);
6723   int i;
6724
6725   NYI_assert (29, 23, 0x5D);
6726   NYI_assert (21, 10, 0x83E);
6727
6728   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6729   if (INSTR (22, 22))
6730     {
6731       if (! full)
6732         HALT_UNALLOC;
6733
6734       for (i = 0; i < 2; i++)
6735         aarch64_set_vec_double (cpu, vd, i,
6736                                 - aarch64_get_vec_double (cpu, vn, i));
6737     }
6738   else
6739     {
6740       for (i = 0; i < (full ? 4 : 2); i++)
6741         aarch64_set_vec_float (cpu, vd, i,
6742                                - aarch64_get_vec_float (cpu, vn, i));
6743     }
6744 }
6745
6746 static void
6747 do_vec_NOT (sim_cpu *cpu)
6748 {
6749   /* instr[31]    = 0
6750      instr[30]    = half (0)/full (1)
6751      instr[29,10] = 10 1110 0010 0000 0101 10
6752      instr[9,5]   = Vn
6753      instr[4.0]   = Vd.  */
6754
6755   unsigned vn = INSTR (9, 5);
6756   unsigned vd = INSTR (4, 0);
6757   unsigned i;
6758   int      full = INSTR (30, 30);
6759
6760   NYI_assert (29, 10, 0xB8816);
6761
6762   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6763   for (i = 0; i < (full ? 16 : 8); i++)
6764     aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6765 }
6766
6767 static unsigned int
6768 clz (uint64_t val, unsigned size)
6769 {
6770   uint64_t mask = 1;
6771   int      count;
6772
6773   mask <<= (size - 1);
6774   count = 0;
6775   do
6776     {
6777       if (val & mask)
6778         break;
6779       mask >>= 1;
6780       count ++;
6781     }
6782   while (mask);
6783
6784   return count;
6785 }
6786
6787 static void
6788 do_vec_CLZ (sim_cpu *cpu)
6789 {
6790   /* instr[31]    = 0
6791      instr[30]    = half (0)/full (1)
6792      instr[29,24] = 10 1110
6793      instr[23,22] = size
6794      instr[21,10] = 10 0000 0100 10
6795      instr[9,5]   = Vn
6796      instr[4.0]   = Vd.  */
6797
6798   unsigned vn = INSTR (9, 5);
6799   unsigned vd = INSTR (4, 0);
6800   unsigned i;
6801   int      full = INSTR (30,30);
6802
6803   NYI_assert (29, 24, 0x2E);
6804   NYI_assert (21, 10, 0x812);
6805
6806   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6807   switch (INSTR (23, 22))
6808     {
6809     case 0:
6810       for (i = 0; i < (full ? 16 : 8); i++)
6811         aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6812       break;
6813     case 1:
6814       for (i = 0; i < (full ? 8 : 4); i++)
6815         aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6816       break;
6817     case 2:
6818       for (i = 0; i < (full ? 4 : 2); i++)
6819         aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6820       break;
6821     case 3:
6822       if (! full)
6823         HALT_UNALLOC;
6824       aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6825       aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6826       break;
6827     }
6828 }
6829
6830 static void
6831 do_vec_MOV_element (sim_cpu *cpu)
6832 {
6833   /* instr[31,21] = 0110 1110 000
6834      instr[20,16] = size & dest index
6835      instr[15]    = 0
6836      instr[14,11] = source index
6837      instr[10]    = 1
6838      instr[9,5]   = Vs
6839      instr[4.0]   = Vd.  */
6840
6841   unsigned vs = INSTR (9, 5);
6842   unsigned vd = INSTR (4, 0);
6843   unsigned src_index;
6844   unsigned dst_index;
6845
6846   NYI_assert (31, 21, 0x370);
6847   NYI_assert (15, 15, 0);
6848   NYI_assert (10, 10, 1);
6849
6850   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6851   if (INSTR (16, 16))
6852     {
6853       /* Move a byte.  */
6854       src_index = INSTR (14, 11);
6855       dst_index = INSTR (20, 17);
6856       aarch64_set_vec_u8 (cpu, vd, dst_index,
6857                           aarch64_get_vec_u8 (cpu, vs, src_index));
6858     }
6859   else if (INSTR (17, 17))
6860     {
6861       /* Move 16-bits.  */
6862       NYI_assert (11, 11, 0);
6863       src_index = INSTR (14, 12);
6864       dst_index = INSTR (20, 18);
6865       aarch64_set_vec_u16 (cpu, vd, dst_index,
6866                            aarch64_get_vec_u16 (cpu, vs, src_index));
6867     }
6868   else if (INSTR (18, 18))
6869     {
6870       /* Move 32-bits.  */
6871       NYI_assert (12, 11, 0);
6872       src_index = INSTR (14, 13);
6873       dst_index = INSTR (20, 19);
6874       aarch64_set_vec_u32 (cpu, vd, dst_index,
6875                            aarch64_get_vec_u32 (cpu, vs, src_index));
6876     }
6877   else
6878     {
6879       NYI_assert (19, 19, 1);
6880       NYI_assert (13, 11, 0);
6881       src_index = INSTR (14, 14);
6882       dst_index = INSTR (20, 20);
6883       aarch64_set_vec_u64 (cpu, vd, dst_index,
6884                            aarch64_get_vec_u64 (cpu, vs, src_index));
6885     }
6886 }
6887
6888 static void
6889 do_vec_REV32 (sim_cpu *cpu)
6890 {
6891   /* instr[31]    = 0
6892      instr[30]    = full/half
6893      instr[29,24] = 10 1110
6894      instr[23,22] = size
6895      instr[21,10] = 10 0000 0000 10
6896      instr[9,5]   = Rn
6897      instr[4,0]   = Rd.  */
6898
6899   unsigned rn = INSTR (9, 5);
6900   unsigned rd = INSTR (4, 0);
6901   unsigned size = INSTR (23, 22);
6902   unsigned full = INSTR (30, 30);
6903   unsigned i;
6904   FRegister val;
6905
6906   NYI_assert (29, 24, 0x2E);
6907   NYI_assert (21, 10, 0x802);
6908
6909   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6910   switch (size)
6911     {
6912     case 0:
6913       for (i = 0; i < (full ? 16 : 8); i++)
6914         val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6915       break;
6916
6917     case 1:
6918       for (i = 0; i < (full ? 8 : 4); i++)
6919         val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6920       break;
6921
6922     default:
6923       HALT_UNALLOC;
6924     }
6925
6926   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6927   if (full)
6928     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6929 }
6930
6931 static void
6932 do_vec_EXT (sim_cpu *cpu)
6933 {
6934   /* instr[31]    = 0
6935      instr[30]    = full/half
6936      instr[29,21] = 10 1110 000
6937      instr[20,16] = Vm
6938      instr[15]    = 0
6939      instr[14,11] = source index
6940      instr[10]    = 0
6941      instr[9,5]   = Vn
6942      instr[4.0]   = Vd.  */
6943
6944   unsigned vm = INSTR (20, 16);
6945   unsigned vn = INSTR (9, 5);
6946   unsigned vd = INSTR (4, 0);
6947   unsigned src_index = INSTR (14, 11);
6948   unsigned full = INSTR (30, 30);
6949   unsigned i;
6950   unsigned j;
6951   FRegister val;
6952
6953   NYI_assert (31, 21, 0x370);
6954   NYI_assert (15, 15, 0);
6955   NYI_assert (10, 10, 0);
6956
6957   if (!full && (src_index & 0x8))
6958     HALT_UNALLOC;
6959
6960   j = 0;
6961
6962   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6963   for (i = src_index; i < (full ? 16 : 8); i++)
6964     val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6965   for (i = 0; i < src_index; i++)
6966     val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6967
6968   aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6969   if (full)
6970     aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6971 }
6972
6973 static void
6974 dexAdvSIMD0 (sim_cpu *cpu)
6975 {
6976   /* instr [28,25] = 0 111.  */
6977   if (    INSTR (15, 10) == 0x07
6978       && (INSTR (9, 5) ==
6979           INSTR (20, 16)))
6980     {
6981       if (INSTR (31, 21) == 0x075
6982           || INSTR (31, 21) == 0x275)
6983         {
6984           do_vec_MOV_whole_vector (cpu);
6985           return;
6986         }
6987     }
6988
6989   if (INSTR (29, 19) == 0x1E0)
6990     {
6991       do_vec_MOV_immediate (cpu);
6992       return;
6993     }
6994
6995   if (INSTR (29, 19) == 0x5E0)
6996     {
6997       do_vec_MVNI (cpu);
6998       return;
6999     }
7000
7001   if (INSTR (29, 19) == 0x1C0
7002       || INSTR (29, 19) == 0x1C1)
7003     {
7004       if (INSTR (15, 10) == 0x03)
7005         {
7006           do_vec_DUP_scalar_into_vector (cpu);
7007           return;
7008         }
7009     }
7010
7011   switch (INSTR (29, 24))
7012     {
7013     case 0x0E: do_vec_op1 (cpu); return;
7014     case 0x0F: do_vec_op2 (cpu); return;
7015
7016     case 0x2E:
7017       if (INSTR (21, 21) == 1)
7018         {
7019           switch (INSTR (15, 10))
7020             {
7021             case 0x02:
7022               do_vec_REV32 (cpu);
7023               return;
7024
7025             case 0x07:
7026               switch (INSTR (23, 22))
7027                 {
7028                 case 0: do_vec_EOR (cpu); return;
7029                 case 1: do_vec_BSL (cpu); return;
7030                 case 2:
7031                 case 3: do_vec_bit (cpu); return;
7032                 }
7033               break;
7034
7035             case 0x08: do_vec_sub_long (cpu); return;
7036             case 0x11: do_vec_USHL (cpu); return;
7037             case 0x12: do_vec_CLZ (cpu); return;
7038             case 0x16: do_vec_NOT (cpu); return;
7039             case 0x19: do_vec_max (cpu); return;
7040             case 0x1B: do_vec_min (cpu); return;
7041             case 0x21: do_vec_SUB (cpu); return;
7042             case 0x25: do_vec_MLS (cpu); return;
7043             case 0x31: do_vec_FminmaxNMP (cpu); return;
7044             case 0x35: do_vec_FADDP (cpu); return;
7045             case 0x37: do_vec_FMUL (cpu); return;
7046             case 0x3F: do_vec_FDIV (cpu); return;
7047
7048             case 0x3E:
7049               switch (INSTR (20, 16))
7050                 {
7051                 case 0x00: do_vec_FNEG (cpu); return;
7052                 case 0x01: do_vec_FSQRT (cpu); return;
7053                 default:   HALT_NYI;
7054                 }
7055
7056             case 0x0D:
7057             case 0x0F:
7058             case 0x22:
7059             case 0x23:
7060             case 0x26:
7061             case 0x2A:
7062             case 0x32:
7063             case 0x36:
7064             case 0x39:
7065             case 0x3A:
7066               do_vec_compare (cpu); return;
7067
7068             default:
7069               break;
7070             }
7071         }
7072
7073       if (INSTR (31, 21) == 0x370)
7074         {
7075           if (INSTR (10, 10))
7076             do_vec_MOV_element (cpu);
7077           else
7078             do_vec_EXT (cpu);
7079           return;
7080         }
7081
7082       switch (INSTR (21, 10))
7083         {
7084         case 0x82E: do_vec_neg (cpu); return;
7085         case 0x87E: do_vec_sqrt (cpu); return;
7086         default:
7087           if (INSTR (15, 10) == 0x30)
7088             {
7089               do_vec_mull (cpu);
7090               return;
7091             }
7092           break;
7093         }
7094       break;
7095
7096     case 0x2f:
7097       switch (INSTR (15, 10))
7098         {
7099         case 0x01: do_vec_SSHR_USHR (cpu); return;
7100         case 0x10:
7101         case 0x12: do_vec_mls_indexed (cpu); return;
7102         case 0x29: do_vec_xtl (cpu); return;
7103         default:
7104           HALT_NYI;
7105         }
7106
7107     default:
7108       break;
7109     }
7110
7111   HALT_NYI;
7112 }
7113
7114 /* 3 sources.  */
7115
7116 /* Float multiply add.  */
7117 static void
7118 fmadds (sim_cpu *cpu)
7119 {
7120   unsigned sa = INSTR (14, 10);
7121   unsigned sm = INSTR (20, 16);
7122   unsigned sn = INSTR ( 9,  5);
7123   unsigned sd = INSTR ( 4,  0);
7124
7125   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7126   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7127                         + aarch64_get_FP_float (cpu, sn)
7128                         * aarch64_get_FP_float (cpu, sm));
7129 }
7130
7131 /* Double multiply add.  */
7132 static void
7133 fmaddd (sim_cpu *cpu)
7134 {
7135   unsigned sa = INSTR (14, 10);
7136   unsigned sm = INSTR (20, 16);
7137   unsigned sn = INSTR ( 9,  5);
7138   unsigned sd = INSTR ( 4,  0);
7139
7140   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7141   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7142                          + aarch64_get_FP_double (cpu, sn)
7143                          * aarch64_get_FP_double (cpu, sm));
7144 }
7145
7146 /* Float multiply subtract.  */
7147 static void
7148 fmsubs (sim_cpu *cpu)
7149 {
7150   unsigned sa = INSTR (14, 10);
7151   unsigned sm = INSTR (20, 16);
7152   unsigned sn = INSTR ( 9,  5);
7153   unsigned sd = INSTR ( 4,  0);
7154
7155   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7156   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7157                         - aarch64_get_FP_float (cpu, sn)
7158                         * aarch64_get_FP_float (cpu, sm));
7159 }
7160
7161 /* Double multiply subtract.  */
7162 static void
7163 fmsubd (sim_cpu *cpu)
7164 {
7165   unsigned sa = INSTR (14, 10);
7166   unsigned sm = INSTR (20, 16);
7167   unsigned sn = INSTR ( 9,  5);
7168   unsigned sd = INSTR ( 4,  0);
7169
7170   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7171   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7172                          - aarch64_get_FP_double (cpu, sn)
7173                          * aarch64_get_FP_double (cpu, sm));
7174 }
7175
7176 /* Float negative multiply add.  */
7177 static void
7178 fnmadds (sim_cpu *cpu)
7179 {
7180   unsigned sa = INSTR (14, 10);
7181   unsigned sm = INSTR (20, 16);
7182   unsigned sn = INSTR ( 9,  5);
7183   unsigned sd = INSTR ( 4,  0);
7184
7185   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7186   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7187                         + (- aarch64_get_FP_float (cpu, sn))
7188                         * aarch64_get_FP_float (cpu, sm));
7189 }
7190
7191 /* Double negative multiply add.  */
7192 static void
7193 fnmaddd (sim_cpu *cpu)
7194 {
7195   unsigned sa = INSTR (14, 10);
7196   unsigned sm = INSTR (20, 16);
7197   unsigned sn = INSTR ( 9,  5);
7198   unsigned sd = INSTR ( 4,  0);
7199
7200   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7201   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7202                          + (- aarch64_get_FP_double (cpu, sn))
7203                          * aarch64_get_FP_double (cpu, sm));
7204 }
7205
7206 /* Float negative multiply subtract.  */
7207 static void
7208 fnmsubs (sim_cpu *cpu)
7209 {
7210   unsigned sa = INSTR (14, 10);
7211   unsigned sm = INSTR (20, 16);
7212   unsigned sn = INSTR ( 9,  5);
7213   unsigned sd = INSTR ( 4,  0);
7214
7215   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7216   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7217                         + aarch64_get_FP_float (cpu, sn)
7218                         * aarch64_get_FP_float (cpu, sm));
7219 }
7220
7221 /* Double negative multiply subtract.  */
7222 static void
7223 fnmsubd (sim_cpu *cpu)
7224 {
7225   unsigned sa = INSTR (14, 10);
7226   unsigned sm = INSTR (20, 16);
7227   unsigned sn = INSTR ( 9,  5);
7228   unsigned sd = INSTR ( 4,  0);
7229
7230   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7231   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7232                          + aarch64_get_FP_double (cpu, sn)
7233                          * aarch64_get_FP_double (cpu, sm));
7234 }
7235
7236 static void
7237 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7238 {
7239   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7240      instr[30]    = 0
7241      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7242      instr[28,25] = 1111
7243      instr[24]    = 1
7244      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7245      instr[21]    ==> o1 : 0 ==> unnegated, 1 ==> negated
7246      instr[15]    ==> o2 : 0 ==> ADD, 1 ==> SUB  */
7247
7248   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7249   /* dispatch on combined type:o1:o2.  */
7250   uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7251
7252   if (M_S != 0)
7253     HALT_UNALLOC;
7254
7255   switch (dispatch)
7256     {
7257     case 0: fmadds (cpu); return;
7258     case 1: fmsubs (cpu); return;
7259     case 2: fnmadds (cpu); return;
7260     case 3: fnmsubs (cpu); return;
7261     case 4: fmaddd (cpu); return;
7262     case 5: fmsubd (cpu); return;
7263     case 6: fnmaddd (cpu); return;
7264     case 7: fnmsubd (cpu); return;
7265     default:
7266       /* type > 1 is currently unallocated.  */
7267       HALT_UNALLOC;
7268     }
7269 }
7270
7271 static void
7272 dexSimpleFPFixedConvert (sim_cpu *cpu)
7273 {
7274   HALT_NYI;
7275 }
7276
7277 static void
7278 dexSimpleFPCondCompare (sim_cpu *cpu)
7279 {
7280   /* instr [31,23] = 0001 1110 0
7281      instr [22]    = type
7282      instr [21]    = 1
7283      instr [20,16] = Rm
7284      instr [15,12] = condition
7285      instr [11,10] = 01
7286      instr [9,5]   = Rn
7287      instr [4]     = 0
7288      instr [3,0]   = nzcv  */
7289
7290   unsigned rm = INSTR (20, 16);
7291   unsigned rn = INSTR (9, 5);
7292
7293   NYI_assert (31, 23, 0x3C);
7294   NYI_assert (11, 10, 0x1);
7295   NYI_assert (4,  4,  0);
7296
7297   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7298   if (! testConditionCode (cpu, INSTR (15, 12)))
7299     {
7300       aarch64_set_CPSR (cpu, INSTR (3, 0));
7301       return;
7302     }
7303
7304   if (INSTR (22, 22))
7305     {
7306       /* Double precision.  */
7307       double val1 = aarch64_get_vec_double (cpu, rn, 0);
7308       double val2 = aarch64_get_vec_double (cpu, rm, 0);
7309
7310       /* FIXME: Check for NaNs.  */
7311       if (val1 == val2)
7312         aarch64_set_CPSR (cpu, (Z | C));
7313       else if (val1 < val2)
7314         aarch64_set_CPSR (cpu, N);
7315       else /* val1 > val2 */
7316         aarch64_set_CPSR (cpu, C);
7317     }
7318   else
7319     {
7320       /* Single precision.  */
7321       float val1 = aarch64_get_vec_float (cpu, rn, 0);
7322       float val2 = aarch64_get_vec_float (cpu, rm, 0);
7323
7324       /* FIXME: Check for NaNs.  */
7325       if (val1 == val2)
7326         aarch64_set_CPSR (cpu, (Z | C));
7327       else if (val1 < val2)
7328         aarch64_set_CPSR (cpu, N);
7329       else /* val1 > val2 */
7330         aarch64_set_CPSR (cpu, C);
7331     }
7332 }
7333
7334 /* 2 sources.  */
7335
7336 /* Float add.  */
7337 static void
7338 fadds (sim_cpu *cpu)
7339 {
7340   unsigned sm = INSTR (20, 16);
7341   unsigned sn = INSTR ( 9,  5);
7342   unsigned sd = INSTR ( 4,  0);
7343
7344   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7345   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7346                         + aarch64_get_FP_float (cpu, sm));
7347 }
7348
7349 /* Double add.  */
7350 static void
7351 faddd (sim_cpu *cpu)
7352 {
7353   unsigned sm = INSTR (20, 16);
7354   unsigned sn = INSTR ( 9,  5);
7355   unsigned sd = INSTR ( 4,  0);
7356
7357   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7358   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7359                          + aarch64_get_FP_double (cpu, sm));
7360 }
7361
7362 /* Float divide.  */
7363 static void
7364 fdivs (sim_cpu *cpu)
7365 {
7366   unsigned sm = INSTR (20, 16);
7367   unsigned sn = INSTR ( 9,  5);
7368   unsigned sd = INSTR ( 4,  0);
7369
7370   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7371   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7372                         / aarch64_get_FP_float (cpu, sm));
7373 }
7374
7375 /* Double divide.  */
7376 static void
7377 fdivd (sim_cpu *cpu)
7378 {
7379   unsigned sm = INSTR (20, 16);
7380   unsigned sn = INSTR ( 9,  5);
7381   unsigned sd = INSTR ( 4,  0);
7382
7383   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7384   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7385                          / aarch64_get_FP_double (cpu, sm));
7386 }
7387
7388 /* Float multiply.  */
7389 static void
7390 fmuls (sim_cpu *cpu)
7391 {
7392   unsigned sm = INSTR (20, 16);
7393   unsigned sn = INSTR ( 9,  5);
7394   unsigned sd = INSTR ( 4,  0);
7395
7396   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7397   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7398                         * aarch64_get_FP_float (cpu, sm));
7399 }
7400
7401 /* Double multiply.  */
7402 static void
7403 fmuld (sim_cpu *cpu)
7404 {
7405   unsigned sm = INSTR (20, 16);
7406   unsigned sn = INSTR ( 9,  5);
7407   unsigned sd = INSTR ( 4,  0);
7408
7409   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7410   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7411                          * aarch64_get_FP_double (cpu, sm));
7412 }
7413
7414 /* Float negate and multiply.  */
7415 static void
7416 fnmuls (sim_cpu *cpu)
7417 {
7418   unsigned sm = INSTR (20, 16);
7419   unsigned sn = INSTR ( 9,  5);
7420   unsigned sd = INSTR ( 4,  0);
7421
7422   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7423   aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7424                                     * aarch64_get_FP_float (cpu, sm)));
7425 }
7426
7427 /* Double negate and multiply.  */
7428 static void
7429 fnmuld (sim_cpu *cpu)
7430 {
7431   unsigned sm = INSTR (20, 16);
7432   unsigned sn = INSTR ( 9,  5);
7433   unsigned sd = INSTR ( 4,  0);
7434
7435   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7436   aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7437                                      * aarch64_get_FP_double (cpu, sm)));
7438 }
7439
7440 /* Float subtract.  */
7441 static void
7442 fsubs (sim_cpu *cpu)
7443 {
7444   unsigned sm = INSTR (20, 16);
7445   unsigned sn = INSTR ( 9,  5);
7446   unsigned sd = INSTR ( 4,  0);
7447
7448   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7449   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7450                         - aarch64_get_FP_float (cpu, sm));
7451 }
7452
7453 /* Double subtract.  */
7454 static void
7455 fsubd (sim_cpu *cpu)
7456 {
7457   unsigned sm = INSTR (20, 16);
7458   unsigned sn = INSTR ( 9,  5);
7459   unsigned sd = INSTR ( 4,  0);
7460
7461   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7462   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7463                          - aarch64_get_FP_double (cpu, sm));
7464 }
7465
7466 static void
7467 do_FMINNM (sim_cpu *cpu)
7468 {
7469   /* instr[31,23] = 0 0011 1100
7470      instr[22]    = float(0)/double(1)
7471      instr[21]    = 1
7472      instr[20,16] = Sm
7473      instr[15,10] = 01 1110
7474      instr[9,5]   = Sn
7475      instr[4,0]   = Cpu  */
7476
7477   unsigned sm = INSTR (20, 16);
7478   unsigned sn = INSTR ( 9,  5);
7479   unsigned sd = INSTR ( 4,  0);
7480
7481   NYI_assert (31, 23, 0x03C);
7482   NYI_assert (15, 10, 0x1E);
7483
7484   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7485   if (INSTR (22, 22))
7486     aarch64_set_FP_double (cpu, sd,
7487                            dminnm (aarch64_get_FP_double (cpu, sn),
7488                                    aarch64_get_FP_double (cpu, sm)));
7489   else
7490     aarch64_set_FP_float (cpu, sd,
7491                           fminnm (aarch64_get_FP_float (cpu, sn),
7492                                   aarch64_get_FP_float (cpu, sm)));
7493 }
7494
7495 static void
7496 do_FMAXNM (sim_cpu *cpu)
7497 {
7498   /* instr[31,23] = 0 0011 1100
7499      instr[22]    = float(0)/double(1)
7500      instr[21]    = 1
7501      instr[20,16] = Sm
7502      instr[15,10] = 01 1010
7503      instr[9,5]   = Sn
7504      instr[4,0]   = Cpu  */
7505
7506   unsigned sm = INSTR (20, 16);
7507   unsigned sn = INSTR ( 9,  5);
7508   unsigned sd = INSTR ( 4,  0);
7509
7510   NYI_assert (31, 23, 0x03C);
7511   NYI_assert (15, 10, 0x1A);
7512
7513   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7514   if (INSTR (22, 22))
7515     aarch64_set_FP_double (cpu, sd,
7516                            dmaxnm (aarch64_get_FP_double (cpu, sn),
7517                                    aarch64_get_FP_double (cpu, sm)));
7518   else
7519     aarch64_set_FP_float (cpu, sd,
7520                           fmaxnm (aarch64_get_FP_float (cpu, sn),
7521                                   aarch64_get_FP_float (cpu, sm)));
7522 }
7523
7524 static void
7525 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7526 {
7527   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7528      instr[30]    = 0
7529      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7530      instr[28,25] = 1111
7531      instr[24]    = 0
7532      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7533      instr[21]    = 1
7534      instr[20,16] = Vm
7535      instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7536                                0010 ==> FADD, 0011 ==> FSUB,
7537                                0100 ==> FMAX, 0101 ==> FMIN
7538                                0110 ==> FMAXNM, 0111 ==> FMINNM
7539                                1000 ==> FNMUL, ow ==> UNALLOC
7540      instr[11,10] = 10
7541      instr[9,5]   = Vn
7542      instr[4,0]   = Vd  */
7543
7544   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7545   uint32_t type = INSTR (23, 22);
7546   /* Dispatch on opcode.  */
7547   uint32_t dispatch = INSTR (15, 12);
7548
7549   if (type > 1)
7550     HALT_UNALLOC;
7551
7552   if (M_S != 0)
7553     HALT_UNALLOC;
7554
7555   if (type)
7556     switch (dispatch)
7557       {
7558       case 0: fmuld (cpu); return;
7559       case 1: fdivd (cpu); return;
7560       case 2: faddd (cpu); return;
7561       case 3: fsubd (cpu); return;
7562       case 6: do_FMAXNM (cpu); return;
7563       case 7: do_FMINNM (cpu); return;
7564       case 8: fnmuld (cpu); return;
7565
7566         /* Have not yet implemented fmax and fmin.  */
7567       case 4:
7568       case 5:
7569         HALT_NYI;
7570
7571       default:
7572         HALT_UNALLOC;
7573       }
7574   else /* type == 0 => floats.  */
7575     switch (dispatch)
7576       {
7577       case 0: fmuls (cpu); return;
7578       case 1: fdivs (cpu); return;
7579       case 2: fadds (cpu); return;
7580       case 3: fsubs (cpu); return;
7581       case 6: do_FMAXNM (cpu); return;
7582       case 7: do_FMINNM (cpu); return;
7583       case 8: fnmuls (cpu); return;
7584
7585       case 4:
7586       case 5:
7587         HALT_NYI;
7588
7589       default:
7590         HALT_UNALLOC;
7591       }
7592 }
7593
7594 static void
7595 dexSimpleFPCondSelect (sim_cpu *cpu)
7596 {
7597   /* FCSEL
7598      instr[31,23] = 0 0011 1100
7599      instr[22]    = 0=>single 1=>double
7600      instr[21]    = 1
7601      instr[20,16] = Sm
7602      instr[15,12] = cond
7603      instr[11,10] = 11
7604      instr[9,5]   = Sn
7605      instr[4,0]   = Cpu  */
7606   unsigned sm = INSTR (20, 16);
7607   unsigned sn = INSTR ( 9, 5);
7608   unsigned sd = INSTR ( 4, 0);
7609   uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7610
7611   NYI_assert (31, 23, 0x03C);
7612   NYI_assert (11, 10, 0x3);
7613
7614   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7615   if (INSTR (22, 22))
7616     aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7617                                      : aarch64_get_FP_double (cpu, sm)));
7618   else
7619     aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7620                                     : aarch64_get_FP_float (cpu, sm)));
7621 }
7622
7623 /* Store 32 bit unscaled signed 9 bit.  */
7624 static void
7625 fsturs (sim_cpu *cpu, int32_t offset)
7626 {
7627   unsigned int rn = INSTR (9, 5);
7628   unsigned int st = INSTR (4, 0);
7629
7630   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7631   aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7632                        aarch64_get_vec_u32 (cpu, st, 0));
7633 }
7634
7635 /* Store 64 bit unscaled signed 9 bit.  */
7636 static void
7637 fsturd (sim_cpu *cpu, int32_t offset)
7638 {
7639   unsigned int rn = INSTR (9, 5);
7640   unsigned int st = INSTR (4, 0);
7641
7642   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7643   aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7644                        aarch64_get_vec_u64 (cpu, st, 0));
7645 }
7646
7647 /* Store 128 bit unscaled signed 9 bit.  */
7648 static void
7649 fsturq (sim_cpu *cpu, int32_t offset)
7650 {
7651   unsigned int rn = INSTR (9, 5);
7652   unsigned int st = INSTR (4, 0);
7653   FRegister a;
7654
7655   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7656   aarch64_get_FP_long_double (cpu, st, & a);
7657   aarch64_set_mem_long_double (cpu,
7658                                aarch64_get_reg_u64 (cpu, rn, 1)
7659                                + offset, a);
7660 }
7661
7662 /* TODO FP move register.  */
7663
7664 /* 32 bit fp to fp move register.  */
7665 static void
7666 ffmovs (sim_cpu *cpu)
7667 {
7668   unsigned int rn = INSTR (9, 5);
7669   unsigned int st = INSTR (4, 0);
7670
7671   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7672   aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7673 }
7674
7675 /* 64 bit fp to fp move register.  */
7676 static void
7677 ffmovd (sim_cpu *cpu)
7678 {
7679   unsigned int rn = INSTR (9, 5);
7680   unsigned int st = INSTR (4, 0);
7681
7682   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7683   aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7684 }
7685
7686 /* 32 bit GReg to Vec move register.  */
7687 static void
7688 fgmovs (sim_cpu *cpu)
7689 {
7690   unsigned int rn = INSTR (9, 5);
7691   unsigned int st = INSTR (4, 0);
7692
7693   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7694   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7695 }
7696
7697 /* 64 bit g to fp move register.  */
7698 static void
7699 fgmovd (sim_cpu *cpu)
7700 {
7701   unsigned int rn = INSTR (9, 5);
7702   unsigned int st = INSTR (4, 0);
7703
7704   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7705   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7706 }
7707
7708 /* 32 bit fp to g move register.  */
7709 static void
7710 gfmovs (sim_cpu *cpu)
7711 {
7712   unsigned int rn = INSTR (9, 5);
7713   unsigned int st = INSTR (4, 0);
7714
7715   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7716   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7717 }
7718
7719 /* 64 bit fp to g move register.  */
7720 static void
7721 gfmovd (sim_cpu *cpu)
7722 {
7723   unsigned int rn = INSTR (9, 5);
7724   unsigned int st = INSTR (4, 0);
7725
7726   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7727   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7728 }
7729
7730 /* FP move immediate
7731
7732    These install an immediate 8 bit value in the target register
7733    where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7734    bit exponent.  */
7735
7736 static void
7737 fmovs (sim_cpu *cpu)
7738 {
7739   unsigned int sd = INSTR (4, 0);
7740   uint32_t imm = INSTR (20, 13);
7741   float f = fp_immediate_for_encoding_32 (imm);
7742
7743   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7744   aarch64_set_FP_float (cpu, sd, f);
7745 }
7746
7747 static void
7748 fmovd (sim_cpu *cpu)
7749 {
7750   unsigned int sd = INSTR (4, 0);
7751   uint32_t imm = INSTR (20, 13);
7752   double d = fp_immediate_for_encoding_64 (imm);
7753
7754   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7755   aarch64_set_FP_double (cpu, sd, d);
7756 }
7757
7758 static void
7759 dexSimpleFPImmediate (sim_cpu *cpu)
7760 {
7761   /* instr[31,23] == 00111100
7762      instr[22]    == type : single(0)/double(1)
7763      instr[21]    == 1
7764      instr[20,13] == imm8
7765      instr[12,10] == 100
7766      instr[9,5]   == imm5 : 00000 ==> PK, ow ==> UNALLOC
7767      instr[4,0]   == Rd  */
7768   uint32_t imm5 = INSTR (9, 5);
7769
7770   NYI_assert (31, 23, 0x3C);
7771
7772   if (imm5 != 0)
7773     HALT_UNALLOC;
7774
7775   if (INSTR (22, 22))
7776     fmovd (cpu);
7777   else
7778     fmovs (cpu);
7779 }
7780
7781 /* TODO specific decode and execute for group Load Store.  */
7782
7783 /* TODO FP load/store single register (unscaled offset).  */
7784
7785 /* TODO load 8 bit unscaled signed 9 bit.  */
7786 /* TODO load 16 bit unscaled signed 9 bit.  */
7787
7788 /* Load 32 bit unscaled signed 9 bit.  */
7789 static void
7790 fldurs (sim_cpu *cpu, int32_t offset)
7791 {
7792   unsigned int rn = INSTR (9, 5);
7793   unsigned int st = INSTR (4, 0);
7794
7795   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7796   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7797                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7798 }
7799
7800 /* Load 64 bit unscaled signed 9 bit.  */
7801 static void
7802 fldurd (sim_cpu *cpu, int32_t offset)
7803 {
7804   unsigned int rn = INSTR (9, 5);
7805   unsigned int st = INSTR (4, 0);
7806
7807   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7808   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7809                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7810 }
7811
7812 /* Load 128 bit unscaled signed 9 bit.  */
7813 static void
7814 fldurq (sim_cpu *cpu, int32_t offset)
7815 {
7816   unsigned int rn = INSTR (9, 5);
7817   unsigned int st = INSTR (4, 0);
7818   FRegister a;
7819   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7820
7821   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7822   aarch64_get_mem_long_double (cpu, addr, & a);
7823   aarch64_set_FP_long_double (cpu, st, a);
7824 }
7825
7826 /* TODO store 8 bit unscaled signed 9 bit.  */
7827 /* TODO store 16 bit unscaled signed 9 bit.  */
7828
7829
7830 /* 1 source.  */
7831
7832 /* Float absolute value.  */
7833 static void
7834 fabss (sim_cpu *cpu)
7835 {
7836   unsigned sn = INSTR (9, 5);
7837   unsigned sd = INSTR (4, 0);
7838   float value = aarch64_get_FP_float (cpu, sn);
7839
7840   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7841   aarch64_set_FP_float (cpu, sd, fabsf (value));
7842 }
7843
7844 /* Double absolute value.  */
7845 static void
7846 fabcpu (sim_cpu *cpu)
7847 {
7848   unsigned sn = INSTR (9, 5);
7849   unsigned sd = INSTR (4, 0);
7850   double value = aarch64_get_FP_double (cpu, sn);
7851
7852   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7853   aarch64_set_FP_double (cpu, sd, fabs (value));
7854 }
7855
7856 /* Float negative value.  */
7857 static void
7858 fnegs (sim_cpu *cpu)
7859 {
7860   unsigned sn = INSTR (9, 5);
7861   unsigned sd = INSTR (4, 0);
7862
7863   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7864   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7865 }
7866
7867 /* Double negative value.  */
7868 static void
7869 fnegd (sim_cpu *cpu)
7870 {
7871   unsigned sn = INSTR (9, 5);
7872   unsigned sd = INSTR (4, 0);
7873
7874   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7875   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7876 }
7877
7878 /* Float square root.  */
7879 static void
7880 fsqrts (sim_cpu *cpu)
7881 {
7882   unsigned sn = INSTR (9, 5);
7883   unsigned sd = INSTR (4, 0);
7884
7885   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7886   aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7887 }
7888
7889 /* Double square root.  */
7890 static void
7891 fsqrtd (sim_cpu *cpu)
7892 {
7893   unsigned sn = INSTR (9, 5);
7894   unsigned sd = INSTR (4, 0);
7895
7896   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7897   aarch64_set_FP_double (cpu, sd,
7898                          sqrt (aarch64_get_FP_double (cpu, sn)));
7899 }
7900
7901 /* Convert double to float.  */
7902 static void
7903 fcvtds (sim_cpu *cpu)
7904 {
7905   unsigned sn = INSTR (9, 5);
7906   unsigned sd = INSTR (4, 0);
7907
7908   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7909   aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7910 }
7911
7912 /* Convert float to double.  */
7913 static void
7914 fcvtcpu (sim_cpu *cpu)
7915 {
7916   unsigned sn = INSTR (9, 5);
7917   unsigned sd = INSTR (4, 0);
7918
7919   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7920   aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7921 }
7922
7923 static void
7924 do_FRINT (sim_cpu *cpu)
7925 {
7926   /* instr[31,23] = 0001 1110 0
7927      instr[22]    = single(0)/double(1)
7928      instr[21,18] = 1001
7929      instr[17,15] = rounding mode
7930      instr[14,10] = 10000
7931      instr[9,5]   = source
7932      instr[4,0]   = dest  */
7933
7934   float val;
7935   unsigned rs = INSTR (9, 5);
7936   unsigned rd = INSTR (4, 0);
7937   unsigned int rmode = INSTR (17, 15);
7938
7939   NYI_assert (31, 23, 0x03C);
7940   NYI_assert (21, 18, 0x9);
7941   NYI_assert (14, 10, 0x10);
7942
7943   if (rmode == 6 || rmode == 7)
7944     /* FIXME: Add support for rmode == 6 exactness check.  */
7945     rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7946
7947   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7948   if (INSTR (22, 22))
7949     {
7950       double val = aarch64_get_FP_double (cpu, rs);
7951
7952       switch (rmode)
7953         {
7954         case 0: /* mode N: nearest or even.  */
7955           {
7956             double rval = round (val);
7957
7958             if (val - rval == 0.5)
7959               {
7960                 if (((rval / 2.0) * 2.0) != rval)
7961                   rval += 1.0;
7962               }
7963
7964             aarch64_set_FP_double (cpu, rd, round (val));
7965             return;
7966           }
7967
7968         case 1: /* mode P: towards +inf.  */
7969           if (val < 0.0)
7970             aarch64_set_FP_double (cpu, rd, trunc (val));
7971           else
7972             aarch64_set_FP_double (cpu, rd, round (val));
7973           return;
7974
7975         case 2: /* mode M: towards -inf.  */
7976           if (val < 0.0)
7977             aarch64_set_FP_double (cpu, rd, round (val));
7978           else
7979             aarch64_set_FP_double (cpu, rd, trunc (val));
7980           return;
7981
7982         case 3: /* mode Z: towards 0.  */
7983           aarch64_set_FP_double (cpu, rd, trunc (val));
7984           return;
7985
7986         case 4: /* mode A: away from 0.  */
7987           aarch64_set_FP_double (cpu, rd, round (val));
7988           return;
7989
7990         case 6: /* mode X: use FPCR with exactness check.  */
7991         case 7: /* mode I: use FPCR mode.  */
7992           HALT_NYI;
7993
7994         default:
7995           HALT_UNALLOC;
7996         }
7997     }
7998
7999   val = aarch64_get_FP_float (cpu, rs);
8000
8001   switch (rmode)
8002     {
8003     case 0: /* mode N: nearest or even.  */
8004       {
8005         float rval = roundf (val);
8006
8007         if (val - rval == 0.5)
8008           {
8009             if (((rval / 2.0) * 2.0) != rval)
8010               rval += 1.0;
8011           }
8012
8013         aarch64_set_FP_float (cpu, rd, rval);
8014         return;
8015       }
8016
8017     case 1: /* mode P: towards +inf.  */
8018       if (val < 0.0)
8019         aarch64_set_FP_float (cpu, rd, truncf (val));
8020       else
8021         aarch64_set_FP_float (cpu, rd, roundf (val));
8022       return;
8023
8024     case 2: /* mode M: towards -inf.  */
8025       if (val < 0.0)
8026         aarch64_set_FP_float (cpu, rd, truncf (val));
8027       else
8028         aarch64_set_FP_float (cpu, rd, roundf (val));
8029       return;
8030
8031     case 3: /* mode Z: towards 0.  */
8032       aarch64_set_FP_float (cpu, rd, truncf (val));
8033       return;
8034
8035     case 4: /* mode A: away from 0.  */
8036       aarch64_set_FP_float (cpu, rd, roundf (val));
8037       return;
8038
8039     case 6: /* mode X: use FPCR with exactness check.  */
8040     case 7: /* mode I: use FPCR mode.  */
8041       HALT_NYI;
8042
8043     default:
8044       HALT_UNALLOC;
8045     }
8046 }
8047
8048 /* Convert half to float.  */
8049 static void
8050 do_FCVT_half_to_single (sim_cpu *cpu)
8051 {
8052   unsigned rn = INSTR (9, 5);
8053   unsigned rd = INSTR (4, 0);
8054
8055   NYI_assert (31, 10, 0x7B890);
8056
8057   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8058   aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half  (cpu, rn));
8059 }
8060
8061 /* Convert half to double.  */
8062 static void
8063 do_FCVT_half_to_double (sim_cpu *cpu)
8064 {
8065   unsigned rn = INSTR (9, 5);
8066   unsigned rd = INSTR (4, 0);
8067
8068   NYI_assert (31, 10, 0x7B8B0);
8069
8070   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8071   aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half  (cpu, rn));
8072 }
8073
8074 static void
8075 do_FCVT_single_to_half (sim_cpu *cpu)
8076 {
8077   unsigned rn = INSTR (9, 5);
8078   unsigned rd = INSTR (4, 0);
8079
8080   NYI_assert (31, 10, 0x788F0);
8081
8082   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8083   aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float  (cpu, rn));
8084 }
8085
8086 /* Convert double to half.  */
8087 static void
8088 do_FCVT_double_to_half (sim_cpu *cpu)
8089 {
8090   unsigned rn = INSTR (9, 5);
8091   unsigned rd = INSTR (4, 0);
8092
8093   NYI_assert (31, 10, 0x798F0);
8094
8095   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8096   aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double  (cpu, rn));
8097 }
8098
8099 static void
8100 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8101 {
8102   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
8103      instr[30]    = 0
8104      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
8105      instr[28,25] = 1111
8106      instr[24]    = 0
8107      instr[23,22] ==> type : 00 ==> source is single,
8108                              01 ==> source is double
8109                              10 ==> UNALLOC
8110                              11 ==> UNALLOC or source is half
8111      instr[21]    = 1
8112      instr[20,15] ==> opcode : with type 00 or 01
8113                                000000 ==> FMOV, 000001 ==> FABS,
8114                                000010 ==> FNEG, 000011 ==> FSQRT,
8115                                000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8116                                000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8117                                001000 ==> FRINTN, 001001 ==> FRINTP,
8118                                001010 ==> FRINTM, 001011 ==> FRINTZ,
8119                                001100 ==> FRINTA, 001101 ==> UNALLOC
8120                                001110 ==> FRINTX, 001111 ==> FRINTI
8121                                with type 11
8122                                000100 ==> FCVT (half-to-single)
8123                                000101 ==> FCVT (half-to-double)
8124                                instr[14,10] = 10000.  */
8125
8126   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8127   uint32_t type   = INSTR (23, 22);
8128   uint32_t opcode = INSTR (20, 15);
8129
8130   if (M_S != 0)
8131     HALT_UNALLOC;
8132
8133   if (type == 3)
8134     {
8135       if (opcode == 4)
8136         do_FCVT_half_to_single (cpu);
8137       else if (opcode == 5)
8138         do_FCVT_half_to_double (cpu);
8139       else
8140         HALT_UNALLOC;
8141       return;
8142     }
8143
8144   if (type == 2)
8145     HALT_UNALLOC;
8146
8147   switch (opcode)
8148     {
8149     case 0:
8150       if (type)
8151         ffmovd (cpu);
8152       else
8153         ffmovs (cpu);
8154       return;
8155
8156     case 1:
8157       if (type)
8158         fabcpu (cpu);
8159       else
8160         fabss (cpu);
8161       return;
8162
8163     case 2:
8164       if (type)
8165         fnegd (cpu);
8166       else
8167         fnegs (cpu);
8168       return;
8169
8170     case 3:
8171       if (type)
8172         fsqrtd (cpu);
8173       else
8174         fsqrts (cpu);
8175       return;
8176
8177     case 4:
8178       if (type)
8179         fcvtds (cpu);
8180       else
8181         HALT_UNALLOC;
8182       return;
8183
8184     case 5:
8185       if (type)
8186         HALT_UNALLOC;
8187       fcvtcpu (cpu);
8188       return;
8189
8190     case 8:             /* FRINTN etc.  */
8191     case 9:
8192     case 10:
8193     case 11:
8194     case 12:
8195     case 14:
8196     case 15:
8197        do_FRINT (cpu);
8198        return;
8199
8200     case 7:
8201       if (INSTR (22, 22))
8202         do_FCVT_double_to_half (cpu);
8203       else
8204         do_FCVT_single_to_half (cpu);
8205       return;
8206
8207     case 13:
8208       HALT_NYI;
8209
8210     default:
8211       HALT_UNALLOC;
8212     }
8213 }
8214
8215 /* 32 bit signed int to float.  */
8216 static void
8217 scvtf32 (sim_cpu *cpu)
8218 {
8219   unsigned rn = INSTR (9, 5);
8220   unsigned sd = INSTR (4, 0);
8221
8222   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8223   aarch64_set_FP_float
8224     (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8225 }
8226
8227 /* signed int to float.  */
8228 static void
8229 scvtf (sim_cpu *cpu)
8230 {
8231   unsigned rn = INSTR (9, 5);
8232   unsigned sd = INSTR (4, 0);
8233
8234   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8235   aarch64_set_FP_float
8236     (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8237 }
8238
8239 /* 32 bit signed int to double.  */
8240 static void
8241 scvtd32 (sim_cpu *cpu)
8242 {
8243   unsigned rn = INSTR (9, 5);
8244   unsigned sd = INSTR (4, 0);
8245
8246   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8247   aarch64_set_FP_double
8248     (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8249 }
8250
8251 /* signed int to double.  */
8252 static void
8253 scvtd (sim_cpu *cpu)
8254 {
8255   unsigned rn = INSTR (9, 5);
8256   unsigned sd = INSTR (4, 0);
8257
8258   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8259   aarch64_set_FP_double
8260     (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8261 }
8262
8263 static const float  FLOAT_INT_MAX   = (float)  INT_MAX;
8264 static const float  FLOAT_INT_MIN   = (float)  INT_MIN;
8265 static const double DOUBLE_INT_MAX  = (double) INT_MAX;
8266 static const double DOUBLE_INT_MIN  = (double) INT_MIN;
8267 static const float  FLOAT_LONG_MAX  = (float)  LONG_MAX;
8268 static const float  FLOAT_LONG_MIN  = (float)  LONG_MIN;
8269 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8270 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8271
8272 #define UINT_MIN 0
8273 #define ULONG_MIN 0
8274 static const float  FLOAT_UINT_MAX   = (float)  UINT_MAX;
8275 static const float  FLOAT_UINT_MIN   = (float)  UINT_MIN;
8276 static const double DOUBLE_UINT_MAX  = (double) UINT_MAX;
8277 static const double DOUBLE_UINT_MIN  = (double) UINT_MIN;
8278 static const float  FLOAT_ULONG_MAX  = (float)  ULONG_MAX;
8279 static const float  FLOAT_ULONG_MIN  = (float)  ULONG_MIN;
8280 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8281 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8282
8283 /* Check for FP exception conditions:
8284      NaN raises IO
8285      Infinity raises IO
8286      Out of Range raises IO and IX and saturates value
8287      Denormal raises ID and IX and sets to zero.  */
8288 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE)        \
8289   do                                                    \
8290     {                                                   \
8291       switch (fpclassify (F))                           \
8292         {                                               \
8293         case FP_INFINITE:                               \
8294         case FP_NAN:                                    \
8295           aarch64_set_FPSR (cpu, IO);                   \
8296           if (signbit (F))                              \
8297             VALUE = ITYPE##_MAX;                        \
8298           else                                          \
8299             VALUE = ITYPE##_MIN;                        \
8300           break;                                        \
8301                                                         \
8302         case FP_NORMAL:                                 \
8303           if (F >= FTYPE##_##ITYPE##_MAX)               \
8304             {                                           \
8305               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8306               VALUE = ITYPE##_MAX;                      \
8307             }                                           \
8308           else if (F <= FTYPE##_##ITYPE##_MIN)          \
8309             {                                           \
8310               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8311               VALUE = ITYPE##_MIN;                      \
8312             }                                           \
8313           break;                                        \
8314                                                         \
8315         case FP_SUBNORMAL:                              \
8316           aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID);   \
8317           VALUE = 0;                                    \
8318           break;                                        \
8319                                                         \
8320         default:                                        \
8321         case FP_ZERO:                                   \
8322           VALUE = 0;                                    \
8323           break;                                        \
8324         }                                               \
8325     }                                                   \
8326   while (0)
8327
8328 /* 32 bit convert float to signed int truncate towards zero.  */
8329 static void
8330 fcvtszs32 (sim_cpu *cpu)
8331 {
8332   unsigned sn = INSTR (9, 5);
8333   unsigned rd = INSTR (4, 0);
8334   /* TODO : check that this rounds toward zero.  */
8335   float   f = aarch64_get_FP_float (cpu, sn);
8336   int32_t value = (int32_t) f;
8337
8338   RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8339
8340   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8341   /* Avoid sign extension to 64 bit.  */
8342   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8343 }
8344
8345 /* 64 bit convert float to signed int truncate towards zero.  */
8346 static void
8347 fcvtszs (sim_cpu *cpu)
8348 {
8349   unsigned sn = INSTR (9, 5);
8350   unsigned rd = INSTR (4, 0);
8351   float f = aarch64_get_FP_float (cpu, sn);
8352   int64_t value = (int64_t) f;
8353
8354   RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8355
8356   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8357   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8358 }
8359
8360 /* 32 bit convert double to signed int truncate towards zero.  */
8361 static void
8362 fcvtszd32 (sim_cpu *cpu)
8363 {
8364   unsigned sn = INSTR (9, 5);
8365   unsigned rd = INSTR (4, 0);
8366   /* TODO : check that this rounds toward zero.  */
8367   double   d = aarch64_get_FP_double (cpu, sn);
8368   int32_t  value = (int32_t) d;
8369
8370   RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8371
8372   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8373   /* Avoid sign extension to 64 bit.  */
8374   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8375 }
8376
8377 /* 64 bit convert double to signed int truncate towards zero.  */
8378 static void
8379 fcvtszd (sim_cpu *cpu)
8380 {
8381   unsigned sn = INSTR (9, 5);
8382   unsigned rd = INSTR (4, 0);
8383   /* TODO : check that this rounds toward zero.  */
8384   double  d = aarch64_get_FP_double (cpu, sn);
8385   int64_t value;
8386
8387   value = (int64_t) d;
8388
8389   RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8390
8391   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8392   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8393 }
8394
8395 static void
8396 do_fcvtzu (sim_cpu *cpu)
8397 {
8398   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8399      instr[30,23] = 00111100
8400      instr[22]    = type: single (0)/ double (1)
8401      instr[21]    = enable (0)/disable(1) precision
8402      instr[20,16] = 11001
8403      instr[15,10] = precision
8404      instr[9,5]   = Rs
8405      instr[4,0]   = Rd.  */
8406
8407   unsigned rs = INSTR (9, 5);
8408   unsigned rd = INSTR (4, 0);
8409
8410   NYI_assert (30, 23, 0x3C);
8411   NYI_assert (20, 16, 0x19);
8412
8413   if (INSTR (21, 21) != 1)
8414     /* Convert to fixed point.  */
8415     HALT_NYI;
8416
8417   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8418   if (INSTR (31, 31))
8419     {
8420       /* Convert to unsigned 64-bit integer.  */
8421       if (INSTR (22, 22))
8422         {
8423           double  d = aarch64_get_FP_double (cpu, rs);
8424           uint64_t value = (uint64_t) d;
8425
8426           /* Do not raise an exception if we have reached ULONG_MAX.  */
8427           if (value != (1ULL << 63))
8428             RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8429
8430           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8431         }
8432       else
8433         {
8434           float  f = aarch64_get_FP_float (cpu, rs);
8435           uint64_t value = (uint64_t) f;
8436
8437           /* Do not raise an exception if we have reached ULONG_MAX.  */
8438           if (value != (1ULL << 63))
8439             RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8440
8441           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8442         }
8443     }
8444   else
8445     {
8446       uint32_t value;
8447
8448       /* Convert to unsigned 32-bit integer.  */
8449       if (INSTR (22, 22))
8450         {
8451           double  d = aarch64_get_FP_double (cpu, rs);
8452
8453           value = (uint32_t) d;
8454           /* Do not raise an exception if we have reached UINT_MAX.  */
8455           if (value != (1UL << 31))
8456             RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8457         }
8458       else
8459         {
8460           float  f = aarch64_get_FP_float (cpu, rs);
8461
8462           value = (uint32_t) f;
8463           /* Do not raise an exception if we have reached UINT_MAX.  */
8464           if (value != (1UL << 31))
8465             RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8466         }
8467
8468       aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8469     }
8470 }
8471
8472 static void
8473 do_UCVTF (sim_cpu *cpu)
8474 {
8475   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8476      instr[30,23] = 001 1110 0
8477      instr[22]    = type: single (0)/ double (1)
8478      instr[21]    = enable (0)/disable(1) precision
8479      instr[20,16] = 0 0011
8480      instr[15,10] = precision
8481      instr[9,5]   = Rs
8482      instr[4,0]   = Rd.  */
8483
8484   unsigned rs = INSTR (9, 5);
8485   unsigned rd = INSTR (4, 0);
8486
8487   NYI_assert (30, 23, 0x3C);
8488   NYI_assert (20, 16, 0x03);
8489
8490   if (INSTR (21, 21) != 1)
8491     HALT_NYI;
8492
8493   /* FIXME: Add exception raising.  */
8494   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8495   if (INSTR (31, 31))
8496     {
8497       uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8498
8499       if (INSTR (22, 22))
8500         aarch64_set_FP_double (cpu, rd, (double) value);
8501       else
8502         aarch64_set_FP_float (cpu, rd, (float) value);
8503     }
8504   else
8505     {
8506       uint32_t value =  aarch64_get_reg_u32 (cpu, rs, NO_SP);
8507
8508       if (INSTR (22, 22))
8509         aarch64_set_FP_double (cpu, rd, (double) value);
8510       else
8511         aarch64_set_FP_float (cpu, rd, (float) value);
8512     }
8513 }
8514
8515 static void
8516 float_vector_move (sim_cpu *cpu)
8517 {
8518   /* instr[31,17] == 100 1111 0101 0111
8519      instr[16]    ==> direction 0=> to GR, 1=> from GR
8520      instr[15,10] => ???
8521      instr[9,5]   ==> source
8522      instr[4,0]   ==> dest.  */
8523
8524   unsigned rn = INSTR (9, 5);
8525   unsigned rd = INSTR (4, 0);
8526
8527   NYI_assert (31, 17, 0x4F57);
8528
8529   if (INSTR (15, 10) != 0)
8530     HALT_UNALLOC;
8531
8532   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8533   if (INSTR (16, 16))
8534     aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8535   else
8536     aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8537 }
8538
8539 static void
8540 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8541 {
8542   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
8543      instr[30     = 0
8544      instr[29]    = S :  0 ==> OK, 1 ==> UNALLOC
8545      instr[28,25] = 1111
8546      instr[24]    = 0
8547      instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8548      instr[21]    = 1
8549      instr[20,19] = rmode
8550      instr[18,16] = opcode
8551      instr[15,10] = 10 0000  */
8552
8553   uint32_t rmode_opcode;
8554   uint32_t size_type;
8555   uint32_t type;
8556   uint32_t size;
8557   uint32_t S;
8558
8559   if (INSTR (31, 17) == 0x4F57)
8560     {
8561       float_vector_move (cpu);
8562       return;
8563     }
8564
8565   size = INSTR (31, 31);
8566   S = INSTR (29, 29);
8567   if (S != 0)
8568     HALT_UNALLOC;
8569
8570   type = INSTR (23, 22);
8571   if (type > 1)
8572     HALT_UNALLOC;
8573
8574   rmode_opcode = INSTR (20, 16);
8575   size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d.  */
8576
8577   switch (rmode_opcode)
8578     {
8579     case 2:                     /* SCVTF.  */
8580       switch (size_type)
8581         {
8582         case 0: scvtf32 (cpu); return;
8583         case 1: scvtd32 (cpu); return;
8584         case 2: scvtf (cpu); return;
8585         case 3: scvtd (cpu); return;
8586         }
8587
8588     case 6:                     /* FMOV GR, Vec.  */
8589       switch (size_type)
8590         {
8591         case 0:  gfmovs (cpu); return;
8592         case 3:  gfmovd (cpu); return;
8593         default: HALT_UNALLOC;
8594         }
8595
8596     case 7:                     /* FMOV vec, GR.  */
8597       switch (size_type)
8598         {
8599         case 0:  fgmovs (cpu); return;
8600         case 3:  fgmovd (cpu); return;
8601         default: HALT_UNALLOC;
8602         }
8603
8604     case 24:                    /* FCVTZS.  */
8605       switch (size_type)
8606         {
8607         case 0: fcvtszs32 (cpu); return;
8608         case 1: fcvtszd32 (cpu); return;
8609         case 2: fcvtszs (cpu); return;
8610         case 3: fcvtszd (cpu); return;
8611         }
8612
8613     case 25: do_fcvtzu (cpu); return;
8614     case 3:  do_UCVTF (cpu); return;
8615
8616     case 0:     /* FCVTNS.  */
8617     case 1:     /* FCVTNU.  */
8618     case 4:     /* FCVTAS.  */
8619     case 5:     /* FCVTAU.  */
8620     case 8:     /* FCVPTS.  */
8621     case 9:     /* FCVTPU.  */
8622     case 16:    /* FCVTMS.  */
8623     case 17:    /* FCVTMU.  */
8624     default:
8625       HALT_NYI;
8626     }
8627 }
8628
8629 static void
8630 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8631 {
8632   uint32_t flags;
8633
8634   /* FIXME: Add exception raising.  */
8635   if (isnan (fvalue1) || isnan (fvalue2))
8636     flags = C|V;
8637   else if (isinf (fvalue1) && isinf (fvalue2))
8638     {
8639       /* Subtracting two infinities may give a NaN.  We only need to compare
8640          the signs, which we can get from isinf.  */
8641       int result = isinf (fvalue1) - isinf (fvalue2);
8642
8643       if (result == 0)
8644         flags = Z|C;
8645       else if (result < 0)
8646         flags = N;
8647       else /* (result > 0).  */
8648         flags = C;
8649     }
8650   else
8651     {
8652       float result = fvalue1 - fvalue2;
8653
8654       if (result == 0.0)
8655         flags = Z|C;
8656       else if (result < 0)
8657         flags = N;
8658       else /* (result > 0).  */
8659         flags = C;
8660     }
8661
8662   aarch64_set_CPSR (cpu, flags);
8663 }
8664
8665 static void
8666 fcmps (sim_cpu *cpu)
8667 {
8668   unsigned sm = INSTR (20, 16);
8669   unsigned sn = INSTR ( 9,  5);
8670
8671   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8672   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8673
8674   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8675   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8676 }
8677
8678 /* Float compare to zero -- Invalid Operation exception
8679    only on signaling NaNs.  */
8680 static void
8681 fcmpzs (sim_cpu *cpu)
8682 {
8683   unsigned sn = INSTR ( 9,  5);
8684   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8685
8686   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8687   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8688 }
8689
8690 /* Float compare -- Invalid Operation exception on all NaNs.  */
8691 static void
8692 fcmpes (sim_cpu *cpu)
8693 {
8694   unsigned sm = INSTR (20, 16);
8695   unsigned sn = INSTR ( 9,  5);
8696
8697   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8698   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8699
8700   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8701   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8702 }
8703
8704 /* Float compare to zero -- Invalid Operation exception on all NaNs.  */
8705 static void
8706 fcmpzes (sim_cpu *cpu)
8707 {
8708   unsigned sn = INSTR ( 9,  5);
8709   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8710
8711   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8712   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8713 }
8714
8715 static void
8716 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8717 {
8718   uint32_t flags;
8719
8720   /* FIXME: Add exception raising.  */
8721   if (isnan (dval1) || isnan (dval2))
8722     flags = C|V;
8723   else if (isinf (dval1) && isinf (dval2))
8724     {
8725       /* Subtracting two infinities may give a NaN.  We only need to compare
8726          the signs, which we can get from isinf.  */
8727       int result = isinf (dval1) - isinf (dval2);
8728
8729       if (result == 0)
8730         flags = Z|C;
8731       else if (result < 0)
8732         flags = N;
8733       else /* (result > 0).  */
8734         flags = C;
8735     }
8736   else
8737     {
8738       double result = dval1 - dval2;
8739
8740       if (result == 0.0)
8741         flags = Z|C;
8742       else if (result < 0)
8743         flags = N;
8744       else /* (result > 0).  */
8745         flags = C;
8746     }
8747
8748   aarch64_set_CPSR (cpu, flags);
8749 }
8750
8751 /* Double compare -- Invalid Operation exception only on signaling NaNs.  */
8752 static void
8753 fcmpd (sim_cpu *cpu)
8754 {
8755   unsigned sm = INSTR (20, 16);
8756   unsigned sn = INSTR ( 9,  5);
8757
8758   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8759   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8760
8761   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8762   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8763 }
8764
8765 /* Double compare to zero -- Invalid Operation exception
8766    only on signaling NaNs.  */
8767 static void
8768 fcmpzd (sim_cpu *cpu)
8769 {
8770   unsigned sn = INSTR ( 9,  5);
8771   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8772
8773   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8774   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8775 }
8776
8777 /* Double compare -- Invalid Operation exception on all NaNs.  */
8778 static void
8779 fcmped (sim_cpu *cpu)
8780 {
8781   unsigned sm = INSTR (20, 16);
8782   unsigned sn = INSTR ( 9,  5);
8783
8784   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8785   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8786
8787   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8788   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8789 }
8790
8791 /* Double compare to zero -- Invalid Operation exception on all NaNs.  */
8792 static void
8793 fcmpzed (sim_cpu *cpu)
8794 {
8795   unsigned sn = INSTR ( 9,  5);
8796   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8797
8798   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8799   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8800 }
8801
8802 static void
8803 dexSimpleFPCompare (sim_cpu *cpu)
8804 {
8805   /* assert instr[28,25] == 1111
8806      instr[30:24:21:13,10] = 0011000
8807      instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8808      instr[29] ==> S :  0 ==> OK, 1 ==> UNALLOC
8809      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8810      instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8811      instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8812                               01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8813                               ow ==> UNALLOC  */
8814   uint32_t dispatch;
8815   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8816   uint32_t type = INSTR (23, 22);
8817   uint32_t op = INSTR (15, 14);
8818   uint32_t op2_2_0 = INSTR (2, 0);
8819
8820   if (op2_2_0 != 0)
8821     HALT_UNALLOC;
8822
8823   if (M_S != 0)
8824     HALT_UNALLOC;
8825
8826   if (type > 1)
8827     HALT_UNALLOC;
8828
8829   if (op != 0)
8830     HALT_UNALLOC;
8831
8832   /* dispatch on type and top 2 bits of opcode.  */
8833   dispatch = (type << 2) | INSTR (4, 3);
8834
8835   switch (dispatch)
8836     {
8837     case 0: fcmps (cpu); return;
8838     case 1: fcmpzs (cpu); return;
8839     case 2: fcmpes (cpu); return;
8840     case 3: fcmpzes (cpu); return;
8841     case 4: fcmpd (cpu); return;
8842     case 5: fcmpzd (cpu); return;
8843     case 6: fcmped (cpu); return;
8844     case 7: fcmpzed (cpu); return;
8845     }
8846 }
8847
8848 static void
8849 do_scalar_FADDP (sim_cpu *cpu)
8850 {
8851   /* instr [31,23] = 0111 1110 0
8852      instr [22]    = single(0)/double(1)
8853      instr [21,10] = 11 0000 1101 10
8854      instr [9,5]   = Fn
8855      instr [4,0]   = Fd.  */
8856
8857   unsigned Fn = INSTR (9, 5);
8858   unsigned Fd = INSTR (4, 0);
8859
8860   NYI_assert (31, 23, 0x0FC);
8861   NYI_assert (21, 10, 0xC36);
8862
8863   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8864   if (INSTR (22, 22))
8865     {
8866       double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8867       double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8868
8869       aarch64_set_FP_double (cpu, Fd, val1 + val2);
8870     }
8871   else
8872     {
8873       float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8874       float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8875
8876       aarch64_set_FP_float (cpu, Fd, val1 + val2);
8877     }
8878 }
8879
8880 /* Floating point absolute difference.  */
8881
8882 static void
8883 do_scalar_FABD (sim_cpu *cpu)
8884 {
8885   /* instr [31,23] = 0111 1110 1
8886      instr [22]    = float(0)/double(1)
8887      instr [21]    = 1
8888      instr [20,16] = Rm
8889      instr [15,10] = 1101 01
8890      instr [9, 5]  = Rn
8891      instr [4, 0]  = Rd.  */
8892
8893   unsigned rm = INSTR (20, 16);
8894   unsigned rn = INSTR (9, 5);
8895   unsigned rd = INSTR (4, 0);
8896
8897   NYI_assert (31, 23, 0x0FD);
8898   NYI_assert (21, 21, 1);
8899   NYI_assert (15, 10, 0x35);
8900
8901   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8902   if (INSTR (22, 22))
8903     aarch64_set_FP_double (cpu, rd,
8904                            fabs (aarch64_get_FP_double (cpu, rn)
8905                                  - aarch64_get_FP_double (cpu, rm)));
8906   else
8907     aarch64_set_FP_float (cpu, rd,
8908                           fabsf (aarch64_get_FP_float (cpu, rn)
8909                                  - aarch64_get_FP_float (cpu, rm)));
8910 }
8911
8912 static void
8913 do_scalar_CMGT (sim_cpu *cpu)
8914 {
8915   /* instr [31,21] = 0101 1110 111
8916      instr [20,16] = Rm
8917      instr [15,10] = 00 1101
8918      instr [9, 5]  = Rn
8919      instr [4, 0]  = Rd.  */
8920
8921   unsigned rm = INSTR (20, 16);
8922   unsigned rn = INSTR (9, 5);
8923   unsigned rd = INSTR (4, 0);
8924
8925   NYI_assert (31, 21, 0x2F7);
8926   NYI_assert (15, 10, 0x0D);
8927
8928   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8929   aarch64_set_vec_u64 (cpu, rd, 0,
8930                        aarch64_get_vec_u64 (cpu, rn, 0) >
8931                        aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8932 }
8933
8934 static void
8935 do_scalar_USHR (sim_cpu *cpu)
8936 {
8937   /* instr [31,23] = 0111 1111 0
8938      instr [22,16] = shift amount
8939      instr [15,10] = 0000 01
8940      instr [9, 5]  = Rn
8941      instr [4, 0]  = Rd.  */
8942
8943   unsigned amount = 128 - INSTR (22, 16);
8944   unsigned rn = INSTR (9, 5);
8945   unsigned rd = INSTR (4, 0);
8946
8947   NYI_assert (31, 23, 0x0FE);
8948   NYI_assert (15, 10, 0x01);
8949
8950   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8951   aarch64_set_vec_u64 (cpu, rd, 0,
8952                        aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8953 }
8954
8955 static void
8956 do_scalar_SSHL (sim_cpu *cpu)
8957 {
8958   /* instr [31,21] = 0101 1110 111
8959      instr [20,16] = Rm
8960      instr [15,10] = 0100 01
8961      instr [9, 5]  = Rn
8962      instr [4, 0]  = Rd.  */
8963
8964   unsigned rm = INSTR (20, 16);
8965   unsigned rn = INSTR (9, 5);
8966   unsigned rd = INSTR (4, 0);
8967   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8968
8969   NYI_assert (31, 21, 0x2F7);
8970   NYI_assert (15, 10, 0x11);
8971
8972   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8973   if (shift >= 0)
8974     aarch64_set_vec_s64 (cpu, rd, 0,
8975                          aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8976   else
8977     aarch64_set_vec_s64 (cpu, rd, 0,
8978                          aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8979 }
8980
8981 /* Floating point scalar compare greater than or equal to 0.  */
8982 static void
8983 do_scalar_FCMGE_zero (sim_cpu *cpu)
8984 {
8985   /* instr [31,23] = 0111 1110 1
8986      instr [22,22] = size
8987      instr [21,16] = 1000 00
8988      instr [15,10] = 1100 10
8989      instr [9, 5]  = Rn
8990      instr [4, 0]  = Rd.  */
8991
8992   unsigned size = INSTR (22, 22);
8993   unsigned rn = INSTR (9, 5);
8994   unsigned rd = INSTR (4, 0);
8995
8996   NYI_assert (31, 23, 0x0FD);
8997   NYI_assert (21, 16, 0x20);
8998   NYI_assert (15, 10, 0x32);
8999
9000   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9001   if (size)
9002     aarch64_set_vec_u64 (cpu, rd, 0,
9003                          aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
9004   else
9005     aarch64_set_vec_u32 (cpu, rd, 0,
9006                          aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
9007 }
9008
9009 /* Floating point scalar compare less than or equal to 0.  */
9010 static void
9011 do_scalar_FCMLE_zero (sim_cpu *cpu)
9012 {
9013   /* instr [31,23] = 0111 1110 1
9014      instr [22,22] = size
9015      instr [21,16] = 1000 00
9016      instr [15,10] = 1101 10
9017      instr [9, 5]  = Rn
9018      instr [4, 0]  = Rd.  */
9019
9020   unsigned size = INSTR (22, 22);
9021   unsigned rn = INSTR (9, 5);
9022   unsigned rd = INSTR (4, 0);
9023
9024   NYI_assert (31, 23, 0x0FD);
9025   NYI_assert (21, 16, 0x20);
9026   NYI_assert (15, 10, 0x36);
9027
9028   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9029   if (size)
9030     aarch64_set_vec_u64 (cpu, rd, 0,
9031                          aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
9032   else
9033     aarch64_set_vec_u32 (cpu, rd, 0,
9034                          aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
9035 }
9036
9037 /* Floating point scalar compare greater than 0.  */
9038 static void
9039 do_scalar_FCMGT_zero (sim_cpu *cpu)
9040 {
9041   /* instr [31,23] = 0101 1110 1
9042      instr [22,22] = size
9043      instr [21,16] = 1000 00
9044      instr [15,10] = 1100 10
9045      instr [9, 5]  = Rn
9046      instr [4, 0]  = Rd.  */
9047
9048   unsigned size = INSTR (22, 22);
9049   unsigned rn = INSTR (9, 5);
9050   unsigned rd = INSTR (4, 0);
9051
9052   NYI_assert (31, 23, 0x0BD);
9053   NYI_assert (21, 16, 0x20);
9054   NYI_assert (15, 10, 0x32);
9055
9056   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9057   if (size)
9058     aarch64_set_vec_u64 (cpu, rd, 0,
9059                          aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9060   else
9061     aarch64_set_vec_u32 (cpu, rd, 0,
9062                          aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9063 }
9064
9065 /* Floating point scalar compare equal to 0.  */
9066 static void
9067 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9068 {
9069   /* instr [31,23] = 0101 1110 1
9070      instr [22,22] = size
9071      instr [21,16] = 1000 00
9072      instr [15,10] = 1101 10
9073      instr [9, 5]  = Rn
9074      instr [4, 0]  = Rd.  */
9075
9076   unsigned size = INSTR (22, 22);
9077   unsigned rn = INSTR (9, 5);
9078   unsigned rd = INSTR (4, 0);
9079
9080   NYI_assert (31, 23, 0x0BD);
9081   NYI_assert (21, 16, 0x20);
9082   NYI_assert (15, 10, 0x36);
9083
9084   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9085   if (size)
9086     aarch64_set_vec_u64 (cpu, rd, 0,
9087                          aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9088   else
9089     aarch64_set_vec_u32 (cpu, rd, 0,
9090                          aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9091 }
9092
9093 /* Floating point scalar compare less than 0.  */
9094 static void
9095 do_scalar_FCMLT_zero (sim_cpu *cpu)
9096 {
9097   /* instr [31,23] = 0101 1110 1
9098      instr [22,22] = size
9099      instr [21,16] = 1000 00
9100      instr [15,10] = 1110 10
9101      instr [9, 5]  = Rn
9102      instr [4, 0]  = Rd.  */
9103
9104   unsigned size = INSTR (22, 22);
9105   unsigned rn = INSTR (9, 5);
9106   unsigned rd = INSTR (4, 0);
9107
9108   NYI_assert (31, 23, 0x0BD);
9109   NYI_assert (21, 16, 0x20);
9110   NYI_assert (15, 10, 0x3A);
9111
9112   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9113   if (size)
9114     aarch64_set_vec_u64 (cpu, rd, 0,
9115                          aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9116   else
9117     aarch64_set_vec_u32 (cpu, rd, 0,
9118                          aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9119 }
9120
9121 static void
9122 do_scalar_shift (sim_cpu *cpu)
9123 {
9124   /* instr [31,23] = 0101 1111 0
9125      instr [22,16] = shift amount
9126      instr [15,10] = 0101 01   [SHL]
9127      instr [15,10] = 0000 01   [SSHR]
9128      instr [9, 5]  = Rn
9129      instr [4, 0]  = Rd.  */
9130
9131   unsigned rn = INSTR (9, 5);
9132   unsigned rd = INSTR (4, 0);
9133   unsigned amount;
9134
9135   NYI_assert (31, 23, 0x0BE);
9136
9137   if (INSTR (22, 22) == 0)
9138     HALT_UNALLOC;
9139
9140   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9141   switch (INSTR (15, 10))
9142     {
9143     case 0x01: /* SSHR */
9144       amount = 128 - INSTR (22, 16);
9145       aarch64_set_vec_s64 (cpu, rd, 0,
9146                            aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9147       return;
9148     case 0x15: /* SHL */
9149       amount = INSTR (22, 16) - 64;
9150       aarch64_set_vec_u64 (cpu, rd, 0,
9151                            aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9152       return;
9153     default:
9154       HALT_NYI;
9155     }
9156 }
9157
9158 /* FCMEQ FCMGT FCMGE.  */
9159 static void
9160 do_scalar_FCM (sim_cpu *cpu)
9161 {
9162   /* instr [31,30] = 01
9163      instr [29]    = U
9164      instr [28,24] = 1 1110
9165      instr [23]    = E
9166      instr [22]    = size
9167      instr [21]    = 1
9168      instr [20,16] = Rm
9169      instr [15,12] = 1110
9170      instr [11]    = AC
9171      instr [10]    = 1
9172      instr [9, 5]  = Rn
9173      instr [4, 0]  = Rd.  */
9174
9175   unsigned rm = INSTR (20, 16);
9176   unsigned rn = INSTR (9, 5);
9177   unsigned rd = INSTR (4, 0);
9178   unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9179   unsigned result;
9180   float val1;
9181   float val2;
9182
9183   NYI_assert (31, 30, 1);
9184   NYI_assert (28, 24, 0x1E);
9185   NYI_assert (21, 21, 1);
9186   NYI_assert (15, 12, 0xE);
9187   NYI_assert (10, 10, 1);
9188
9189   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9190   if (INSTR (22, 22))
9191     {
9192       double val1 = aarch64_get_FP_double (cpu, rn);
9193       double val2 = aarch64_get_FP_double (cpu, rm);
9194
9195       switch (EUac)
9196         {
9197         case 0: /* 000 */
9198           result = val1 == val2;
9199           break;
9200
9201         case 3: /* 011 */
9202           val1 = fabs (val1);
9203           val2 = fabs (val2);
9204           /* Fall through. */
9205         case 2: /* 010 */
9206           result = val1 >= val2;
9207           break;
9208
9209         case 7: /* 111 */
9210           val1 = fabs (val1);
9211           val2 = fabs (val2);
9212           /* Fall through. */
9213         case 6: /* 110 */
9214           result = val1 > val2;
9215           break;
9216
9217         default:
9218           HALT_UNALLOC;
9219         }
9220
9221       aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9222       return;
9223     }
9224
9225   val1 = aarch64_get_FP_float (cpu, rn);
9226   val2 = aarch64_get_FP_float (cpu, rm);
9227
9228   switch (EUac)
9229     {
9230     case 0: /* 000 */
9231       result = val1 == val2;
9232       break;
9233
9234     case 3: /* 011 */
9235       val1 = fabsf (val1);
9236       val2 = fabsf (val2);
9237       /* Fall through. */
9238     case 2: /* 010 */
9239       result = val1 >= val2;
9240       break;
9241
9242     case 7: /* 111 */
9243       val1 = fabsf (val1);
9244       val2 = fabsf (val2);
9245       /* Fall through. */
9246     case 6: /* 110 */
9247       result = val1 > val2;
9248       break;
9249
9250     default:
9251       HALT_UNALLOC;
9252     }
9253
9254   aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9255 }
9256
9257 /* An alias of DUP.  */
9258 static void
9259 do_scalar_MOV (sim_cpu *cpu)
9260 {
9261   /* instr [31,21] = 0101 1110 000
9262      instr [20,16] = imm5
9263      instr [15,10] = 0000 01
9264      instr [9, 5]  = Rn
9265      instr [4, 0]  = Rd.  */
9266
9267   unsigned rn = INSTR (9, 5);
9268   unsigned rd = INSTR (4, 0);
9269   unsigned index;
9270
9271   NYI_assert (31, 21, 0x2F0);
9272   NYI_assert (15, 10, 0x01);
9273
9274   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9275   if (INSTR (16, 16))
9276     {
9277       /* 8-bit.  */
9278       index = INSTR (20, 17);
9279       aarch64_set_vec_u8
9280         (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9281     }
9282   else if (INSTR (17, 17))
9283     {
9284       /* 16-bit.  */
9285       index = INSTR (20, 18);
9286       aarch64_set_vec_u16
9287         (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9288     }
9289   else if (INSTR (18, 18))
9290     {
9291       /* 32-bit.  */
9292       index = INSTR (20, 19);
9293       aarch64_set_vec_u32
9294         (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9295     }
9296   else if (INSTR (19, 19))
9297     {
9298       /* 64-bit.  */
9299       index = INSTR (20, 20);
9300       aarch64_set_vec_u64
9301         (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9302     }
9303   else
9304     HALT_UNALLOC;
9305 }
9306
9307 static void
9308 do_scalar_NEG (sim_cpu *cpu)
9309 {
9310   /* instr [31,10] = 0111 1110 1110 0000 1011 10
9311      instr [9, 5]  = Rn
9312      instr [4, 0]  = Rd.  */
9313
9314   unsigned rn = INSTR (9, 5);
9315   unsigned rd = INSTR (4, 0);
9316
9317   NYI_assert (31, 10, 0x1FB82E);
9318
9319   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9320   aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9321 }
9322
9323 static void
9324 do_scalar_USHL (sim_cpu *cpu)
9325 {
9326   /* instr [31,21] = 0111 1110 111
9327      instr [20,16] = Rm
9328      instr [15,10] = 0100 01
9329      instr [9, 5]  = Rn
9330      instr [4, 0]  = Rd.  */
9331
9332   unsigned rm = INSTR (20, 16);
9333   unsigned rn = INSTR (9, 5);
9334   unsigned rd = INSTR (4, 0);
9335   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9336
9337   NYI_assert (31, 21, 0x3F7);
9338   NYI_assert (15, 10, 0x11);
9339
9340   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9341   if (shift >= 0)
9342     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9343   else
9344     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9345 }
9346
9347 static void
9348 do_double_add (sim_cpu *cpu)
9349 {
9350   /* instr [31,21] = 0101 1110 111
9351      instr [20,16] = Fn
9352      instr [15,10] = 1000 01
9353      instr [9,5]   = Fm
9354      instr [4,0]   = Fd.  */
9355   unsigned Fd;
9356   unsigned Fm;
9357   unsigned Fn;
9358   double val1;
9359   double val2;
9360
9361   NYI_assert (31, 21, 0x2F7);
9362   NYI_assert (15, 10, 0x21);
9363
9364   Fd = INSTR (4, 0);
9365   Fm = INSTR (9, 5);
9366   Fn = INSTR (20, 16);
9367
9368   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9369   val1 = aarch64_get_FP_double (cpu, Fm);
9370   val2 = aarch64_get_FP_double (cpu, Fn);
9371
9372   aarch64_set_FP_double (cpu, Fd, val1 + val2);
9373 }
9374
9375 static void
9376 do_scalar_UCVTF (sim_cpu *cpu)
9377 {
9378   /* instr [31,23] = 0111 1110 0
9379      instr [22]    = single(0)/double(1)
9380      instr [21,10] = 10 0001 1101 10
9381      instr [9,5]   = rn
9382      instr [4,0]   = rd.  */
9383
9384   unsigned rn = INSTR (9, 5);
9385   unsigned rd = INSTR (4, 0);
9386
9387   NYI_assert (31, 23, 0x0FC);
9388   NYI_assert (21, 10, 0x876);
9389
9390   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9391   if (INSTR (22, 22))
9392     {
9393       uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9394
9395       aarch64_set_vec_double (cpu, rd, 0, (double) val);
9396     }
9397   else
9398     {
9399       uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9400
9401       aarch64_set_vec_float (cpu, rd, 0, (float) val);
9402     }
9403 }
9404
9405 static void
9406 do_scalar_vec (sim_cpu *cpu)
9407 {
9408   /* instr [30] = 1.  */
9409   /* instr [28,25] = 1111.  */
9410   switch (INSTR (31, 23))
9411     {
9412     case 0xBC:
9413       switch (INSTR (15, 10))
9414         {
9415         case 0x01: do_scalar_MOV (cpu); return;
9416         case 0x39: do_scalar_FCM (cpu); return;
9417         case 0x3B: do_scalar_FCM (cpu); return;
9418         }
9419       break;
9420
9421     case 0xBE: do_scalar_shift (cpu); return;
9422
9423     case 0xFC:
9424       switch (INSTR (15, 10))
9425         {
9426         case 0x36:
9427           switch (INSTR (21, 16))
9428             {
9429             case 0x30: do_scalar_FADDP (cpu); return;
9430             case 0x21: do_scalar_UCVTF (cpu); return;
9431             }
9432           HALT_NYI;
9433         case 0x39: do_scalar_FCM (cpu); return;
9434         case 0x3B: do_scalar_FCM (cpu); return;
9435         }
9436       break;
9437
9438     case 0xFD:
9439       switch (INSTR (15, 10))
9440         {
9441         case 0x0D: do_scalar_CMGT (cpu); return;
9442         case 0x11: do_scalar_USHL (cpu); return;
9443         case 0x2E: do_scalar_NEG (cpu); return;
9444         case 0x32: do_scalar_FCMGE_zero (cpu); return;
9445         case 0x35: do_scalar_FABD (cpu); return;
9446         case 0x36: do_scalar_FCMLE_zero (cpu); return;
9447         case 0x39: do_scalar_FCM (cpu); return;
9448         case 0x3B: do_scalar_FCM (cpu); return;
9449         default:
9450           HALT_NYI;
9451         }
9452
9453     case 0xFE: do_scalar_USHR (cpu); return;
9454
9455     case 0xBD:
9456       switch (INSTR (15, 10))
9457         {
9458         case 0x21: do_double_add (cpu); return;
9459         case 0x11: do_scalar_SSHL (cpu); return;
9460         case 0x32: do_scalar_FCMGT_zero (cpu); return;
9461         case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9462         case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9463         default:
9464           HALT_NYI;
9465         }
9466
9467     default:
9468       HALT_NYI;
9469     }
9470 }
9471
9472 static void
9473 dexAdvSIMD1 (sim_cpu *cpu)
9474 {
9475   /* instr [28,25] = 1 111.  */
9476
9477   /* We are currently only interested in the basic
9478      scalar fp routines which all have bit 30 = 0.  */
9479   if (INSTR (30, 30))
9480     do_scalar_vec (cpu);
9481
9482   /* instr[24] is set for FP data processing 3-source and clear for
9483      all other basic scalar fp instruction groups.  */
9484   else if (INSTR (24, 24))
9485     dexSimpleFPDataProc3Source (cpu);
9486
9487   /* instr[21] is clear for floating <-> fixed conversions and set for
9488      all other basic scalar fp instruction groups.  */
9489   else if (!INSTR (21, 21))
9490     dexSimpleFPFixedConvert (cpu);
9491
9492   /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9493      11 ==> cond select,  00 ==> other.  */
9494   else
9495     switch (INSTR (11, 10))
9496       {
9497       case 1: dexSimpleFPCondCompare (cpu); return;
9498       case 2: dexSimpleFPDataProc2Source (cpu); return;
9499       case 3: dexSimpleFPCondSelect (cpu); return;
9500
9501       default:
9502         /* Now an ordered cascade of tests.
9503            FP immediate has instr [12] == 1.
9504            FP compare has   instr [13] == 1.
9505            FP Data Proc 1 Source has instr [14] == 1.
9506            FP floating <--> integer conversions has instr [15] == 0.  */
9507         if (INSTR (12, 12))
9508           dexSimpleFPImmediate (cpu);
9509
9510         else if (INSTR (13, 13))
9511           dexSimpleFPCompare (cpu);
9512
9513         else if (INSTR (14, 14))
9514           dexSimpleFPDataProc1Source (cpu);
9515
9516         else if (!INSTR (15, 15))
9517           dexSimpleFPIntegerConvert (cpu);
9518
9519         else
9520           /* If we get here then instr[15] == 1 which means UNALLOC.  */
9521           HALT_UNALLOC;
9522       }
9523 }
9524
9525 /* PC relative addressing.  */
9526
9527 static void
9528 pcadr (sim_cpu *cpu)
9529 {
9530   /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9531      instr[30,29] = immlo
9532      instr[23,5] = immhi.  */
9533   uint64_t address;
9534   unsigned rd = INSTR (4, 0);
9535   uint32_t isPage = INSTR (31, 31);
9536   union { int64_t u64; uint64_t s64; } imm;
9537   uint64_t offset;
9538
9539   imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9540   offset = imm.u64;
9541   offset = (offset << 2) | INSTR (30, 29);
9542
9543   address = aarch64_get_PC (cpu);
9544
9545   if (isPage)
9546     {
9547       offset <<= 12;
9548       address &= ~0xfff;
9549     }
9550
9551   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9552   aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9553 }
9554
9555 /* Specific decode and execute for group Data Processing Immediate.  */
9556
9557 static void
9558 dexPCRelAddressing (sim_cpu *cpu)
9559 {
9560   /* assert instr[28,24] = 10000.  */
9561   pcadr (cpu);
9562 }
9563
9564 /* Immediate logical.
9565    The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9566    16, 32 or 64 bit sequence pulled out at decode and possibly
9567    inverting it..
9568
9569    N.B. the output register (dest) can normally be Xn or SP
9570    the exception occurs for flag setting instructions which may
9571    only use Xn for the output (dest).  The input register can
9572    never be SP.  */
9573
9574 /* 32 bit and immediate.  */
9575 static void
9576 and32 (sim_cpu *cpu, uint32_t bimm)
9577 {
9578   unsigned rn = INSTR (9, 5);
9579   unsigned rd = INSTR (4, 0);
9580
9581   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9582   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9583                        aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9584 }
9585
9586 /* 64 bit and immediate.  */
9587 static void
9588 and64 (sim_cpu *cpu, uint64_t bimm)
9589 {
9590   unsigned rn = INSTR (9, 5);
9591   unsigned rd = INSTR (4, 0);
9592
9593   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9594   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9595                        aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9596 }
9597
9598 /* 32 bit and immediate set flags.  */
9599 static void
9600 ands32 (sim_cpu *cpu, uint32_t bimm)
9601 {
9602   unsigned rn = INSTR (9, 5);
9603   unsigned rd = INSTR (4, 0);
9604
9605   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9606   uint32_t value2 = bimm;
9607
9608   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9609   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9610   set_flags_for_binop32 (cpu, value1 & value2);
9611 }
9612
9613 /* 64 bit and immediate set flags.  */
9614 static void
9615 ands64 (sim_cpu *cpu, uint64_t bimm)
9616 {
9617   unsigned rn = INSTR (9, 5);
9618   unsigned rd = INSTR (4, 0);
9619
9620   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9621   uint64_t value2 = bimm;
9622
9623   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9624   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9625   set_flags_for_binop64 (cpu, value1 & value2);
9626 }
9627
9628 /* 32 bit exclusive or immediate.  */
9629 static void
9630 eor32 (sim_cpu *cpu, uint32_t bimm)
9631 {
9632   unsigned rn = INSTR (9, 5);
9633   unsigned rd = INSTR (4, 0);
9634
9635   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9636   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9637                        aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9638 }
9639
9640 /* 64 bit exclusive or immediate.  */
9641 static void
9642 eor64 (sim_cpu *cpu, uint64_t bimm)
9643 {
9644   unsigned rn = INSTR (9, 5);
9645   unsigned rd = INSTR (4, 0);
9646
9647   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9648   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9649                        aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9650 }
9651
9652 /* 32 bit or immediate.  */
9653 static void
9654 orr32 (sim_cpu *cpu, uint32_t bimm)
9655 {
9656   unsigned rn = INSTR (9, 5);
9657   unsigned rd = INSTR (4, 0);
9658
9659   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9660   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9661                        aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9662 }
9663
9664 /* 64 bit or immediate.  */
9665 static void
9666 orr64 (sim_cpu *cpu, uint64_t bimm)
9667 {
9668   unsigned rn = INSTR (9, 5);
9669   unsigned rd = INSTR (4, 0);
9670
9671   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9672   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9673                        aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9674 }
9675
9676 /* Logical shifted register.
9677    These allow an optional LSL, ASR, LSR or ROR to the second source
9678    register with a count up to the register bit count.
9679    N.B register args may not be SP.  */
9680
9681 /* 32 bit AND shifted register.  */
9682 static void
9683 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9684 {
9685   unsigned rm = INSTR (20, 16);
9686   unsigned rn = INSTR (9, 5);
9687   unsigned rd = INSTR (4, 0);
9688
9689   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9690   aarch64_set_reg_u64
9691     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9692      & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9693 }
9694
9695 /* 64 bit AND shifted register.  */
9696 static void
9697 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9698 {
9699   unsigned rm = INSTR (20, 16);
9700   unsigned rn = INSTR (9, 5);
9701   unsigned rd = INSTR (4, 0);
9702
9703   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9704   aarch64_set_reg_u64
9705     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9706      & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9707 }
9708
9709 /* 32 bit AND shifted register setting flags.  */
9710 static void
9711 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9712 {
9713   unsigned rm = INSTR (20, 16);
9714   unsigned rn = INSTR (9, 5);
9715   unsigned rd = INSTR (4, 0);
9716
9717   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9718   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9719                                shift, count);
9720
9721   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9722   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9723   set_flags_for_binop32 (cpu, value1 & value2);
9724 }
9725
9726 /* 64 bit AND shifted register setting flags.  */
9727 static void
9728 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9729 {
9730   unsigned rm = INSTR (20, 16);
9731   unsigned rn = INSTR (9, 5);
9732   unsigned rd = INSTR (4, 0);
9733
9734   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9735   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9736                                shift, count);
9737
9738   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9739   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9740   set_flags_for_binop64 (cpu, value1 & value2);
9741 }
9742
9743 /* 32 bit BIC shifted register.  */
9744 static void
9745 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9746 {
9747   unsigned rm = INSTR (20, 16);
9748   unsigned rn = INSTR (9, 5);
9749   unsigned rd = INSTR (4, 0);
9750
9751   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9752   aarch64_set_reg_u64
9753     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9754      & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9755 }
9756
9757 /* 64 bit BIC shifted register.  */
9758 static void
9759 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9760 {
9761   unsigned rm = INSTR (20, 16);
9762   unsigned rn = INSTR (9, 5);
9763   unsigned rd = INSTR (4, 0);
9764
9765   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9766   aarch64_set_reg_u64
9767     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9768      & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9769 }
9770
9771 /* 32 bit BIC shifted register setting flags.  */
9772 static void
9773 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9774 {
9775   unsigned rm = INSTR (20, 16);
9776   unsigned rn = INSTR (9, 5);
9777   unsigned rd = INSTR (4, 0);
9778
9779   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9780   uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9781                                  shift, count);
9782
9783   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9784   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9785   set_flags_for_binop32 (cpu, value1 & value2);
9786 }
9787
9788 /* 64 bit BIC shifted register setting flags.  */
9789 static void
9790 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9791 {
9792   unsigned rm = INSTR (20, 16);
9793   unsigned rn = INSTR (9, 5);
9794   unsigned rd = INSTR (4, 0);
9795
9796   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9797   uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9798                                  shift, count);
9799
9800   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9801   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9802   set_flags_for_binop64 (cpu, value1 & value2);
9803 }
9804
9805 /* 32 bit EON shifted register.  */
9806 static void
9807 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9808 {
9809   unsigned rm = INSTR (20, 16);
9810   unsigned rn = INSTR (9, 5);
9811   unsigned rd = INSTR (4, 0);
9812
9813   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9814   aarch64_set_reg_u64
9815     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9816      ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9817 }
9818
9819 /* 64 bit EON shifted register.  */
9820 static void
9821 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9822 {
9823   unsigned rm = INSTR (20, 16);
9824   unsigned rn = INSTR (9, 5);
9825   unsigned rd = INSTR (4, 0);
9826
9827   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9828   aarch64_set_reg_u64
9829     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9830      ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9831 }
9832
9833 /* 32 bit EOR shifted register.  */
9834 static void
9835 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9836 {
9837   unsigned rm = INSTR (20, 16);
9838   unsigned rn = INSTR (9, 5);
9839   unsigned rd = INSTR (4, 0);
9840
9841   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9842   aarch64_set_reg_u64
9843     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9844      ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9845 }
9846
9847 /* 64 bit EOR shifted register.  */
9848 static void
9849 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9850 {
9851   unsigned rm = INSTR (20, 16);
9852   unsigned rn = INSTR (9, 5);
9853   unsigned rd = INSTR (4, 0);
9854
9855   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9856   aarch64_set_reg_u64
9857     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9858      ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9859 }
9860
9861 /* 32 bit ORR shifted register.  */
9862 static void
9863 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9864 {
9865   unsigned rm = INSTR (20, 16);
9866   unsigned rn = INSTR (9, 5);
9867   unsigned rd = INSTR (4, 0);
9868
9869   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9870   aarch64_set_reg_u64
9871     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9872      | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9873 }
9874
9875 /* 64 bit ORR shifted register.  */
9876 static void
9877 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9878 {
9879   unsigned rm = INSTR (20, 16);
9880   unsigned rn = INSTR (9, 5);
9881   unsigned rd = INSTR (4, 0);
9882
9883   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9884   aarch64_set_reg_u64
9885     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9886      | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9887 }
9888
9889 /* 32 bit ORN shifted register.  */
9890 static void
9891 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9892 {
9893   unsigned rm = INSTR (20, 16);
9894   unsigned rn = INSTR (9, 5);
9895   unsigned rd = INSTR (4, 0);
9896
9897   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9898   aarch64_set_reg_u64
9899     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9900      | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9901 }
9902
9903 /* 64 bit ORN shifted register.  */
9904 static void
9905 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9906 {
9907   unsigned rm = INSTR (20, 16);
9908   unsigned rn = INSTR (9, 5);
9909   unsigned rd = INSTR (4, 0);
9910
9911   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9912   aarch64_set_reg_u64
9913     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9914      | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9915 }
9916
9917 static void
9918 dexLogicalImmediate (sim_cpu *cpu)
9919 {
9920   /* assert instr[28,23] = 1001000
9921      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9922      instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9923      instr[22] = N : used to construct immediate mask
9924      instr[21,16] = immr
9925      instr[15,10] = imms
9926      instr[9,5] = Rn
9927      instr[4,0] = Rd  */
9928
9929   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9930   uint32_t size = INSTR (31, 31);
9931   uint32_t N = INSTR (22, 22);
9932   /* uint32_t immr = INSTR (21, 16);.  */
9933   /* uint32_t imms = INSTR (15, 10);.  */
9934   uint32_t index = INSTR (22, 10);
9935   uint64_t bimm64 = LITable [index];
9936   uint32_t dispatch = INSTR (30, 29);
9937
9938   if (~size & N)
9939     HALT_UNALLOC;
9940
9941   if (!bimm64)
9942     HALT_UNALLOC;
9943
9944   if (size == 0)
9945     {
9946       uint32_t bimm = (uint32_t) bimm64;
9947
9948       switch (dispatch)
9949         {
9950         case 0: and32 (cpu, bimm); return;
9951         case 1: orr32 (cpu, bimm); return;
9952         case 2: eor32 (cpu, bimm); return;
9953         case 3: ands32 (cpu, bimm); return;
9954         }
9955     }
9956   else
9957     {
9958       switch (dispatch)
9959         {
9960         case 0: and64 (cpu, bimm64); return;
9961         case 1: orr64 (cpu, bimm64); return;
9962         case 2: eor64 (cpu, bimm64); return;
9963         case 3: ands64 (cpu, bimm64); return;
9964         }
9965     }
9966   HALT_UNALLOC;
9967 }
9968
9969 /* Immediate move.
9970    The uimm argument is a 16 bit value to be inserted into the
9971    target register the pos argument locates the 16 bit word in the
9972    dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9973    3} for 64 bit.
9974    N.B register arg may not be SP so it should be.
9975    accessed using the setGZRegisterXXX accessors.  */
9976
9977 /* 32 bit move 16 bit immediate zero remaining shorts.  */
9978 static void
9979 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9980 {
9981   unsigned rd = INSTR (4, 0);
9982
9983   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9984   aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9985 }
9986
9987 /* 64 bit move 16 bit immediate zero remaining shorts.  */
9988 static void
9989 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9990 {
9991   unsigned rd = INSTR (4, 0);
9992
9993   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9994   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9995 }
9996
9997 /* 32 bit move 16 bit immediate negated.  */
9998 static void
9999 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10000 {
10001   unsigned rd = INSTR (4, 0);
10002
10003   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10004   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
10005 }
10006
10007 /* 64 bit move 16 bit immediate negated.  */
10008 static void
10009 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10010 {
10011   unsigned rd = INSTR (4, 0);
10012
10013   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10014   aarch64_set_reg_u64
10015     (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
10016                       ^ 0xffffffffffffffffULL));
10017 }
10018
10019 /* 32 bit move 16 bit immediate keep remaining shorts.  */
10020 static void
10021 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10022 {
10023   unsigned rd = INSTR (4, 0);
10024   uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10025   uint32_t value = val << (pos * 16);
10026   uint32_t mask = ~(0xffffU << (pos * 16));
10027
10028   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10029   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10030 }
10031
10032 /* 64 bit move 16 it immediate keep remaining shorts.  */
10033 static void
10034 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10035 {
10036   unsigned rd = INSTR (4, 0);
10037   uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
10038   uint64_t value = (uint64_t) val << (pos * 16);
10039   uint64_t mask = ~(0xffffULL << (pos * 16));
10040
10041   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10042   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10043 }
10044
10045 static void
10046 dexMoveWideImmediate (sim_cpu *cpu)
10047 {
10048   /* assert instr[28:23] = 100101
10049      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10050      instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
10051      instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10052      instr[20,5] = uimm16
10053      instr[4,0] = Rd  */
10054
10055   /* N.B. the (multiple of 16) shift is applied by the called routine,
10056      we just pass the multiplier.  */
10057
10058   uint32_t imm;
10059   uint32_t size = INSTR (31, 31);
10060   uint32_t op = INSTR (30, 29);
10061   uint32_t shift = INSTR (22, 21);
10062
10063   /* 32 bit can only shift 0 or 1 lot of 16.
10064      anything else is an unallocated instruction.  */
10065   if (size == 0 && (shift > 1))
10066     HALT_UNALLOC;
10067
10068   if (op == 1)
10069     HALT_UNALLOC;
10070
10071   imm = INSTR (20, 5);
10072
10073   if (size == 0)
10074     {
10075       if (op == 0)
10076         movn32 (cpu, imm, shift);
10077       else if (op == 2)
10078         movz32 (cpu, imm, shift);
10079       else
10080         movk32 (cpu, imm, shift);
10081     }
10082   else
10083     {
10084       if (op == 0)
10085         movn64 (cpu, imm, shift);
10086       else if (op == 2)
10087         movz64 (cpu, imm, shift);
10088       else
10089         movk64 (cpu, imm, shift);
10090     }
10091 }
10092
10093 /* Bitfield operations.
10094    These take a pair of bit positions r and s which are in {0..31}
10095    or {0..63} depending on the instruction word size.
10096    N.B register args may not be SP.  */
10097
10098 /* OK, we start with ubfm which just needs to pick
10099    some bits out of source zero the rest and write
10100    the result to dest.  Just need two logical shifts.  */
10101
10102 /* 32 bit bitfield move, left and right of affected zeroed
10103    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10104 static void
10105 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10106 {
10107   unsigned rd;
10108   unsigned rn = INSTR (9, 5);
10109   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10110
10111   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10112   if (r <= s)
10113     {
10114       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10115          We want only bits s:xxx:r at the bottom of the word
10116          so we LSL bit s up to bit 31 i.e. by 31 - s
10117          and then we LSR to bring bit 31 down to bit s - r
10118          i.e. by 31 + r - s.  */
10119       value <<= 31 - s;
10120       value >>= 31 + r - s;
10121     }
10122   else
10123     {
10124       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10125          We want only bits s:xxx:0 starting at it 31-(r-1)
10126          so we LSL bit s up to bit 31 i.e. by 31 - s
10127          and then we LSL to bring bit 31 down to 31-(r-1)+s
10128          i.e. by r - (s + 1).  */
10129       value <<= 31 - s;
10130       value >>= r - (s + 1);
10131     }
10132
10133   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10134   rd = INSTR (4, 0);
10135   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10136 }
10137
10138 /* 64 bit bitfield move, left and right of affected zeroed
10139    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10140 static void
10141 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10142 {
10143   unsigned rd;
10144   unsigned rn = INSTR (9, 5);
10145   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10146
10147   if (r <= s)
10148     {
10149       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10150          We want only bits s:xxx:r at the bottom of the word.
10151          So we LSL bit s up to bit 63 i.e. by 63 - s
10152          and then we LSR to bring bit 63 down to bit s - r
10153          i.e. by 63 + r - s.  */
10154       value <<= 63 - s;
10155       value >>= 63 + r - s;
10156     }
10157   else
10158     {
10159       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10160          We want only bits s:xxx:0 starting at it 63-(r-1).
10161          So we LSL bit s up to bit 63 i.e. by 63 - s
10162          and then we LSL to bring bit 63 down to 63-(r-1)+s
10163          i.e. by r - (s + 1).  */
10164       value <<= 63 - s;
10165       value >>= r - (s + 1);
10166     }
10167
10168   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10169   rd = INSTR (4, 0);
10170   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10171 }
10172
10173 /* The signed versions need to insert sign bits
10174    on the left of the inserted bit field. so we do
10175    much the same as the unsigned version except we
10176    use an arithmetic shift right -- this just means
10177    we need to operate on signed values.  */
10178
10179 /* 32 bit bitfield move, left of affected sign-extended, right zeroed.  */
10180 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10181 static void
10182 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10183 {
10184   unsigned rd;
10185   unsigned rn = INSTR (9, 5);
10186   /* as per ubfm32 but use an ASR instead of an LSR.  */
10187   int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10188
10189   if (r <= s)
10190     {
10191       value <<= 31 - s;
10192       value >>= 31 + r - s;
10193     }
10194   else
10195     {
10196       value <<= 31 - s;
10197       value >>= r - (s + 1);
10198     }
10199
10200   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10201   rd = INSTR (4, 0);
10202   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10203 }
10204
10205 /* 64 bit bitfield move, left of affected sign-extended, right zeroed.  */
10206 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10207 static void
10208 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10209 {
10210   unsigned rd;
10211   unsigned rn = INSTR (9, 5);
10212   /* acpu per ubfm but use an ASR instead of an LSR.  */
10213   int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10214
10215   if (r <= s)
10216     {
10217       value <<= 63 - s;
10218       value >>= 63 + r - s;
10219     }
10220   else
10221     {
10222       value <<= 63 - s;
10223       value >>= r - (s + 1);
10224     }
10225
10226   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10227   rd = INSTR (4, 0);
10228   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10229 }
10230
10231 /* Finally, these versions leave non-affected bits
10232    as is. so we need to generate the bits as per
10233    ubfm and also generate a mask to pick the
10234    bits from the original and computed values.  */
10235
10236 /* 32 bit bitfield move, non-affected bits left as is.
10237    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10238 static void
10239 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10240 {
10241   unsigned rn = INSTR (9, 5);
10242   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10243   uint32_t mask = -1;
10244   unsigned rd;
10245   uint32_t value2;
10246
10247   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10248   if (r <= s)
10249     {
10250       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10251          We want only bits s:xxx:r at the bottom of the word
10252          so we LSL bit s up to bit 31 i.e. by 31 - s
10253          and then we LSR to bring bit 31 down to bit s - r
10254          i.e. by 31 + r - s.  */
10255       value <<= 31 - s;
10256       value >>= 31 + r - s;
10257       /* the mask must include the same bits.  */
10258       mask <<= 31 - s;
10259       mask >>= 31 + r - s;
10260     }
10261   else
10262     {
10263       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10264          We want only bits s:xxx:0 starting at it 31-(r-1)
10265          so we LSL bit s up to bit 31 i.e. by 31 - s
10266          and then we LSL to bring bit 31 down to 31-(r-1)+s
10267          i.e. by r - (s + 1).  */
10268       value <<= 31 - s;
10269       value >>= r - (s + 1);
10270       /* The mask must include the same bits.  */
10271       mask <<= 31 - s;
10272       mask >>= r - (s + 1);
10273     }
10274
10275   rd = INSTR (4, 0);
10276   value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10277
10278   value2 &= ~mask;
10279   value2 |= value;
10280
10281   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10282   aarch64_set_reg_u64
10283     (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10284 }
10285
10286 /* 64 bit bitfield move, non-affected bits left as is.
10287    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10288 static void
10289 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10290 {
10291   unsigned rd;
10292   unsigned rn = INSTR (9, 5);
10293   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10294   uint64_t mask = 0xffffffffffffffffULL;
10295
10296   if (r <= s)
10297     {
10298       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10299          We want only bits s:xxx:r at the bottom of the word
10300          so we LSL bit s up to bit 63 i.e. by 63 - s
10301          and then we LSR to bring bit 63 down to bit s - r
10302          i.e. by 63 + r - s.  */
10303       value <<= 63 - s;
10304       value >>= 63 + r - s;
10305       /* The mask must include the same bits.  */
10306       mask <<= 63 - s;
10307       mask >>= 63 + r - s;
10308     }
10309   else
10310     {
10311       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10312          We want only bits s:xxx:0 starting at it 63-(r-1)
10313          so we LSL bit s up to bit 63 i.e. by 63 - s
10314          and then we LSL to bring bit 63 down to 63-(r-1)+s
10315          i.e. by r - (s + 1).  */
10316       value <<= 63 - s;
10317       value >>= r - (s + 1);
10318       /* The mask must include the same bits.  */
10319       mask <<= 63 - s;
10320       mask >>= r - (s + 1);
10321     }
10322
10323   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10324   rd = INSTR (4, 0);
10325   aarch64_set_reg_u64
10326     (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10327 }
10328
10329 static void
10330 dexBitfieldImmediate (sim_cpu *cpu)
10331 {
10332   /* assert instr[28:23] = 100110
10333      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10334      instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10335      instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10336      instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10337      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10338      instr[9,5] = Rn
10339      instr[4,0] = Rd  */
10340
10341   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10342   uint32_t dispatch;
10343   uint32_t imms;
10344   uint32_t size = INSTR (31, 31);
10345   uint32_t N = INSTR (22, 22);
10346   /* 32 bit operations must have immr[5] = 0 and imms[5] = 0.  */
10347   /* or else we have an UNALLOC.  */
10348   uint32_t immr = INSTR (21, 16);
10349
10350   if (~size & N)
10351     HALT_UNALLOC;
10352
10353   if (!size && uimm (immr, 5, 5))
10354     HALT_UNALLOC;
10355
10356   imms = INSTR (15, 10);
10357   if (!size && uimm (imms, 5, 5))
10358     HALT_UNALLOC;
10359
10360   /* Switch on combined size and op.  */
10361   dispatch = INSTR (31, 29);
10362   switch (dispatch)
10363     {
10364     case 0: sbfm32 (cpu, immr, imms); return;
10365     case 1: bfm32 (cpu, immr, imms); return;
10366     case 2: ubfm32 (cpu, immr, imms); return;
10367     case 4: sbfm (cpu, immr, imms); return;
10368     case 5: bfm (cpu, immr, imms); return;
10369     case 6: ubfm (cpu, immr, imms); return;
10370     default: HALT_UNALLOC;
10371     }
10372 }
10373
10374 static void
10375 do_EXTR_32 (sim_cpu *cpu)
10376 {
10377   /* instr[31:21] = 00010011100
10378      instr[20,16] = Rm
10379      instr[15,10] = imms :  0xxxxx for 32 bit
10380      instr[9,5]   = Rn
10381      instr[4,0]   = Rd  */
10382   unsigned rm   = INSTR (20, 16);
10383   unsigned imms = INSTR (15, 10) & 31;
10384   unsigned rn   = INSTR ( 9,  5);
10385   unsigned rd   = INSTR ( 4,  0);
10386   uint64_t val1;
10387   uint64_t val2;
10388
10389   val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10390   val1 >>= imms;
10391   val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10392   val2 <<= (32 - imms);
10393
10394   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10395   aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10396 }
10397
10398 static void
10399 do_EXTR_64 (sim_cpu *cpu)
10400 {
10401   /* instr[31:21] = 10010011100
10402      instr[20,16] = Rm
10403      instr[15,10] = imms
10404      instr[9,5]   = Rn
10405      instr[4,0]   = Rd  */
10406   unsigned rm   = INSTR (20, 16);
10407   unsigned imms = INSTR (15, 10) & 63;
10408   unsigned rn   = INSTR ( 9,  5);
10409   unsigned rd   = INSTR ( 4,  0);
10410   uint64_t val;
10411
10412   val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10413   val >>= imms;
10414   val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10415
10416   aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10417 }
10418
10419 static void
10420 dexExtractImmediate (sim_cpu *cpu)
10421 {
10422   /* assert instr[28:23] = 100111
10423      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
10424      instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10425      instr[22]    = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10426      instr[21]    = op0 : must be 0 or UNALLOC
10427      instr[20,16] = Rm
10428      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10429      instr[9,5]   = Rn
10430      instr[4,0]   = Rd  */
10431
10432   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10433   /* 64 bit operations must have N = 1 or else we have an UNALLOC.  */
10434   uint32_t dispatch;
10435   uint32_t size = INSTR (31, 31);
10436   uint32_t N = INSTR (22, 22);
10437   /* 32 bit operations must have imms[5] = 0
10438      or else we have an UNALLOC.  */
10439   uint32_t imms = INSTR (15, 10);
10440
10441   if (size ^ N)
10442     HALT_UNALLOC;
10443
10444   if (!size && uimm (imms, 5, 5))
10445     HALT_UNALLOC;
10446
10447   /* Switch on combined size and op.  */
10448   dispatch = INSTR (31, 29);
10449
10450   if (dispatch == 0)
10451     do_EXTR_32 (cpu);
10452
10453   else if (dispatch == 4)
10454     do_EXTR_64 (cpu);
10455
10456   else if (dispatch == 1)
10457     HALT_NYI;
10458   else
10459     HALT_UNALLOC;
10460 }
10461
10462 static void
10463 dexDPImm (sim_cpu *cpu)
10464 {
10465   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10466      assert  group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10467      bits [25,23] of a DPImm are the secondary dispatch vector.  */
10468   uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10469
10470   switch (group2)
10471     {
10472     case DPIMM_PCADR_000:
10473     case DPIMM_PCADR_001:
10474       dexPCRelAddressing (cpu);
10475       return;
10476
10477     case DPIMM_ADDSUB_010:
10478     case DPIMM_ADDSUB_011:
10479       dexAddSubtractImmediate (cpu);
10480       return;
10481
10482     case DPIMM_LOG_100:
10483       dexLogicalImmediate (cpu);
10484       return;
10485
10486     case DPIMM_MOV_101:
10487       dexMoveWideImmediate (cpu);
10488       return;
10489
10490     case DPIMM_BITF_110:
10491       dexBitfieldImmediate (cpu);
10492       return;
10493
10494     case DPIMM_EXTR_111:
10495       dexExtractImmediate (cpu);
10496       return;
10497
10498     default:
10499       /* Should never reach here.  */
10500       HALT_NYI;
10501     }
10502 }
10503
10504 static void
10505 dexLoadUnscaledImmediate (sim_cpu *cpu)
10506 {
10507   /* instr[29,24] == 111_00
10508      instr[21] == 0
10509      instr[11,10] == 00
10510      instr[31,30] = size
10511      instr[26] = V
10512      instr[23,22] = opc
10513      instr[20,12] = simm9
10514      instr[9,5] = rn may be SP.  */
10515   /* unsigned rt = INSTR (4, 0);  */
10516   uint32_t V = INSTR (26, 26);
10517   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10518   int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10519
10520   if (!V)
10521     {
10522       /* GReg operations.  */
10523       switch (dispatch)
10524         {
10525         case 0:  sturb (cpu, imm); return;
10526         case 1:  ldurb32 (cpu, imm); return;
10527         case 2:  ldursb64 (cpu, imm); return;
10528         case 3:  ldursb32 (cpu, imm); return;
10529         case 4:  sturh (cpu, imm); return;
10530         case 5:  ldurh32 (cpu, imm); return;
10531         case 6:  ldursh64 (cpu, imm); return;
10532         case 7:  ldursh32 (cpu, imm); return;
10533         case 8:  stur32 (cpu, imm); return;
10534         case 9:  ldur32 (cpu, imm); return;
10535         case 10: ldursw (cpu, imm); return;
10536         case 12: stur64 (cpu, imm); return;
10537         case 13: ldur64 (cpu, imm); return;
10538
10539         case 14:
10540           /* PRFUM NYI.  */
10541           HALT_NYI;
10542
10543         default:
10544         case 11:
10545         case 15:
10546           HALT_UNALLOC;
10547         }
10548     }
10549
10550   /* FReg operations.  */
10551   switch (dispatch)
10552     {
10553     case 2:  fsturq (cpu, imm); return;
10554     case 3:  fldurq (cpu, imm); return;
10555     case 8:  fsturs (cpu, imm); return;
10556     case 9:  fldurs (cpu, imm); return;
10557     case 12: fsturd (cpu, imm); return;
10558     case 13: fldurd (cpu, imm); return;
10559
10560     case 0: /* STUR 8 bit FP.  */
10561     case 1: /* LDUR 8 bit FP.  */
10562     case 4: /* STUR 16 bit FP.  */
10563     case 5: /* LDUR 8 bit FP.  */
10564       HALT_NYI;
10565
10566     default:
10567     case 6:
10568     case 7:
10569     case 10:
10570     case 11:
10571     case 14:
10572     case 15:
10573       HALT_UNALLOC;
10574     }
10575 }
10576
10577 /*  N.B. A preliminary note regarding all the ldrs<x>32
10578     instructions
10579
10580    The signed value loaded by these instructions is cast to unsigned
10581    before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10582    64 bit element of the GReg union. this performs a 32 bit sign extension
10583    (as required) but avoids 64 bit sign extension, thus ensuring that the
10584    top half of the register word is zero. this is what the spec demands
10585    when a 32 bit load occurs.  */
10586
10587 /* 32 bit load sign-extended byte scaled unsigned 12 bit.  */
10588 static void
10589 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10590 {
10591   unsigned int rn = INSTR (9, 5);
10592   unsigned int rt = INSTR (4, 0);
10593
10594   /* The target register may not be SP but the source may be
10595      there is no scaling required for a byte load.  */
10596   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10597   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10598                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10599 }
10600
10601 /* 32 bit load sign-extended byte scaled or unscaled zero-
10602    or sign-extended 32-bit register offset.  */
10603 static void
10604 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10605 {
10606   unsigned int rm = INSTR (20, 16);
10607   unsigned int rn = INSTR (9, 5);
10608   unsigned int rt = INSTR (4, 0);
10609
10610   /* rn may reference SP, rm and rt must reference ZR.  */
10611
10612   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10613   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10614                                  extension);
10615
10616   /* There is no scaling required for a byte load.  */
10617   aarch64_set_reg_u64
10618     (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10619                                                    + displacement));
10620 }
10621
10622 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10623    pre- or post-writeback.  */
10624 static void
10625 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10626 {
10627   uint64_t address;
10628   unsigned int rn = INSTR (9, 5);
10629   unsigned int rt = INSTR (4, 0);
10630
10631   if (rn == rt && wb != NoWriteBack)
10632     HALT_UNALLOC;
10633
10634   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10635
10636   if (wb == Pre)
10637       address += offset;
10638
10639   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10640                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10641
10642   if (wb == Post)
10643     address += offset;
10644
10645   if (wb != NoWriteBack)
10646     aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10647 }
10648
10649 /* 8 bit store scaled.  */
10650 static void
10651 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10652 {
10653   unsigned st = INSTR (4, 0);
10654   unsigned rn = INSTR (9, 5);
10655
10656   aarch64_set_mem_u8 (cpu,
10657                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10658                       aarch64_get_vec_u8 (cpu, st, 0));
10659 }
10660
10661 /* 8 bit store scaled or unscaled zero- or
10662    sign-extended 8-bit register offset.  */
10663 static void
10664 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10665 {
10666   unsigned rm = INSTR (20, 16);
10667   unsigned rn = INSTR (9, 5);
10668   unsigned st = INSTR (4, 0);
10669
10670   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10671   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10672                                extension);
10673   uint64_t  displacement = scaling == Scaled ? extended : 0;
10674
10675   aarch64_set_mem_u8
10676     (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10677 }
10678
10679 /* 16 bit store scaled.  */
10680 static void
10681 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10682 {
10683   unsigned st = INSTR (4, 0);
10684   unsigned rn = INSTR (9, 5);
10685
10686   aarch64_set_mem_u16
10687     (cpu,
10688      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10689      aarch64_get_vec_u16 (cpu, st, 0));
10690 }
10691
10692 /* 16 bit store scaled or unscaled zero-
10693    or sign-extended 16-bit register offset.  */
10694 static void
10695 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10696 {
10697   unsigned rm = INSTR (20, 16);
10698   unsigned rn = INSTR (9, 5);
10699   unsigned st = INSTR (4, 0);
10700
10701   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10702   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10703                                extension);
10704   uint64_t  displacement = OPT_SCALE (extended, 16, scaling);
10705
10706   aarch64_set_mem_u16
10707     (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10708 }
10709
10710 /* 32 bit store scaled unsigned 12 bit.  */
10711 static void
10712 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10713 {
10714   unsigned st = INSTR (4, 0);
10715   unsigned rn = INSTR (9, 5);
10716
10717   aarch64_set_mem_u32
10718     (cpu,
10719      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10720      aarch64_get_vec_u32 (cpu, st, 0));
10721 }
10722
10723 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10724 static void
10725 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10726 {
10727   unsigned rn = INSTR (9, 5);
10728   unsigned st = INSTR (4, 0);
10729
10730   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10731
10732   if (wb != Post)
10733     address += offset;
10734
10735   aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10736
10737   if (wb == Post)
10738     address += offset;
10739
10740   if (wb != NoWriteBack)
10741     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10742 }
10743
10744 /* 32 bit store scaled or unscaled zero-
10745    or sign-extended 32-bit register offset.  */
10746 static void
10747 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10748 {
10749   unsigned rm = INSTR (20, 16);
10750   unsigned rn = INSTR (9, 5);
10751   unsigned st = INSTR (4, 0);
10752
10753   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10754   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10755                                extension);
10756   uint64_t  displacement = OPT_SCALE (extended, 32, scaling);
10757
10758   aarch64_set_mem_u32
10759     (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10760 }
10761
10762 /* 64 bit store scaled unsigned 12 bit.  */
10763 static void
10764 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10765 {
10766   unsigned st = INSTR (4, 0);
10767   unsigned rn = INSTR (9, 5);
10768
10769   aarch64_set_mem_u64
10770     (cpu,
10771      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10772      aarch64_get_vec_u64 (cpu, st, 0));
10773 }
10774
10775 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10776 static void
10777 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10778 {
10779   unsigned rn = INSTR (9, 5);
10780   unsigned st = INSTR (4, 0);
10781
10782   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10783
10784   if (wb != Post)
10785     address += offset;
10786
10787   aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10788
10789   if (wb == Post)
10790     address += offset;
10791
10792   if (wb != NoWriteBack)
10793     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10794 }
10795
10796 /* 64 bit store scaled or unscaled zero-
10797    or sign-extended 32-bit register offset.  */
10798 static void
10799 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10800 {
10801   unsigned rm = INSTR (20, 16);
10802   unsigned rn = INSTR (9, 5);
10803   unsigned st = INSTR (4, 0);
10804
10805   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10806   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10807                                extension);
10808   uint64_t  displacement = OPT_SCALE (extended, 64, scaling);
10809
10810   aarch64_set_mem_u64
10811     (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10812 }
10813
10814 /* 128 bit store scaled unsigned 12 bit.  */
10815 static void
10816 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10817 {
10818   FRegister a;
10819   unsigned st = INSTR (4, 0);
10820   unsigned rn = INSTR (9, 5);
10821   uint64_t addr;
10822
10823   aarch64_get_FP_long_double (cpu, st, & a);
10824
10825   addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10826   aarch64_set_mem_long_double (cpu, addr, a);
10827 }
10828
10829 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10830 static void
10831 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10832 {
10833   FRegister a;
10834   unsigned rn = INSTR (9, 5);
10835   unsigned st = INSTR (4, 0);
10836   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10837
10838   if (wb != Post)
10839     address += offset;
10840
10841   aarch64_get_FP_long_double (cpu, st, & a);
10842   aarch64_set_mem_long_double (cpu, address, a);
10843
10844   if (wb == Post)
10845     address += offset;
10846
10847   if (wb != NoWriteBack)
10848     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10849 }
10850
10851 /* 128 bit store scaled or unscaled zero-
10852    or sign-extended 32-bit register offset.  */
10853 static void
10854 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10855 {
10856   unsigned rm = INSTR (20, 16);
10857   unsigned rn = INSTR (9, 5);
10858   unsigned st = INSTR (4, 0);
10859
10860   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10861   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10862                                extension);
10863   uint64_t  displacement = OPT_SCALE (extended, 128, scaling);
10864
10865   FRegister a;
10866
10867   aarch64_get_FP_long_double (cpu, st, & a);
10868   aarch64_set_mem_long_double (cpu, address + displacement, a);
10869 }
10870
10871 static void
10872 dexLoadImmediatePrePost (sim_cpu *cpu)
10873 {
10874   /* instr[31,30] = size
10875      instr[29,27] = 111
10876      instr[26]    = V
10877      instr[25,24] = 00
10878      instr[23,22] = opc
10879      instr[21]    = 0
10880      instr[20,12] = simm9
10881      instr[11]    = wb : 0 ==> Post, 1 ==> Pre
10882      instr[10]    = 0
10883      instr[9,5]   = Rn may be SP.
10884      instr[4,0]   = Rt */
10885
10886   uint32_t  V        = INSTR (26, 26);
10887   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10888   int32_t   imm      = simm32 (aarch64_get_instr (cpu), 20, 12);
10889   WriteBack wb       = INSTR (11, 11);
10890
10891   if (!V)
10892     {
10893       /* GReg operations.  */
10894       switch (dispatch)
10895         {
10896         case 0:  strb_wb (cpu, imm, wb); return;
10897         case 1:  ldrb32_wb (cpu, imm, wb); return;
10898         case 2:  ldrsb_wb (cpu, imm, wb); return;
10899         case 3:  ldrsb32_wb (cpu, imm, wb); return;
10900         case 4:  strh_wb (cpu, imm, wb); return;
10901         case 5:  ldrh32_wb (cpu, imm, wb); return;
10902         case 6:  ldrsh64_wb (cpu, imm, wb); return;
10903         case 7:  ldrsh32_wb (cpu, imm, wb); return;
10904         case 8:  str32_wb (cpu, imm, wb); return;
10905         case 9:  ldr32_wb (cpu, imm, wb); return;
10906         case 10: ldrsw_wb (cpu, imm, wb); return;
10907         case 12: str_wb (cpu, imm, wb); return;
10908         case 13: ldr_wb (cpu, imm, wb); return;
10909
10910         default:
10911         case 11:
10912         case 14:
10913         case 15:
10914           HALT_UNALLOC;
10915         }
10916     }
10917
10918   /* FReg operations.  */
10919   switch (dispatch)
10920     {
10921     case 2:  fstrq_wb (cpu, imm, wb); return;
10922     case 3:  fldrq_wb (cpu, imm, wb); return;
10923     case 8:  fstrs_wb (cpu, imm, wb); return;
10924     case 9:  fldrs_wb (cpu, imm, wb); return;
10925     case 12: fstrd_wb (cpu, imm, wb); return;
10926     case 13: fldrd_wb (cpu, imm, wb); return;
10927
10928     case 0:       /* STUR 8 bit FP.  */
10929     case 1:       /* LDUR 8 bit FP.  */
10930     case 4:       /* STUR 16 bit FP.  */
10931     case 5:       /* LDUR 8 bit FP.  */
10932       HALT_NYI;
10933
10934     default:
10935     case 6:
10936     case 7:
10937     case 10:
10938     case 11:
10939     case 14:
10940     case 15:
10941       HALT_UNALLOC;
10942     }
10943 }
10944
10945 static void
10946 dexLoadRegisterOffset (sim_cpu *cpu)
10947 {
10948   /* instr[31,30] = size
10949      instr[29,27] = 111
10950      instr[26]    = V
10951      instr[25,24] = 00
10952      instr[23,22] = opc
10953      instr[21]    = 1
10954      instr[20,16] = rm
10955      instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10956                              110 ==> SXTW, 111 ==> SXTX,
10957                              ow ==> RESERVED
10958      instr[12]    = scaled
10959      instr[11,10] = 10
10960      instr[9,5]   = rn
10961      instr[4,0]   = rt.  */
10962
10963   uint32_t  V = INSTR (26, 26);
10964   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10965   Scaling   scale = INSTR (12, 12);
10966   Extension extensionType = INSTR (15, 13);
10967
10968   /* Check for illegal extension types.  */
10969   if (uimm (extensionType, 1, 1) == 0)
10970     HALT_UNALLOC;
10971
10972   if (extensionType == UXTX || extensionType == SXTX)
10973     extensionType = NoExtension;
10974
10975   if (!V)
10976     {
10977       /* GReg operations.  */
10978       switch (dispatch)
10979         {
10980         case 0:  strb_scale_ext (cpu, scale, extensionType); return;
10981         case 1:  ldrb32_scale_ext (cpu, scale, extensionType); return;
10982         case 2:  ldrsb_scale_ext (cpu, scale, extensionType); return;
10983         case 3:  ldrsb32_scale_ext (cpu, scale, extensionType); return;
10984         case 4:  strh_scale_ext (cpu, scale, extensionType); return;
10985         case 5:  ldrh32_scale_ext (cpu, scale, extensionType); return;
10986         case 6:  ldrsh_scale_ext (cpu, scale, extensionType); return;
10987         case 7:  ldrsh32_scale_ext (cpu, scale, extensionType); return;
10988         case 8:  str32_scale_ext (cpu, scale, extensionType); return;
10989         case 9:  ldr32_scale_ext (cpu, scale, extensionType); return;
10990         case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10991         case 12: str_scale_ext (cpu, scale, extensionType); return;
10992         case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10993         case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10994
10995         default:
10996         case 11:
10997         case 15:
10998           HALT_UNALLOC;
10999         }
11000     }
11001
11002   /* FReg operations.  */
11003   switch (dispatch)
11004     {
11005     case 1: /* LDUR 8 bit FP.  */
11006       HALT_NYI;
11007     case 3:  fldrq_scale_ext (cpu, scale, extensionType); return;
11008     case 5: /* LDUR 8 bit FP.  */
11009       HALT_NYI;
11010     case 9:  fldrs_scale_ext (cpu, scale, extensionType); return;
11011     case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
11012
11013     case 0:  fstrb_scale_ext (cpu, scale, extensionType); return;
11014     case 2:  fstrq_scale_ext (cpu, scale, extensionType); return;
11015     case 4:  fstrh_scale_ext (cpu, scale, extensionType); return;
11016     case 8:  fstrs_scale_ext (cpu, scale, extensionType); return;
11017     case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
11018
11019     default:
11020     case 6:
11021     case 7:
11022     case 10:
11023     case 11:
11024     case 14:
11025     case 15:
11026       HALT_UNALLOC;
11027     }
11028 }
11029
11030 static void
11031 dexLoadUnsignedImmediate (sim_cpu *cpu)
11032 {
11033   /* instr[29,24] == 111_01
11034      instr[31,30] = size
11035      instr[26]    = V
11036      instr[23,22] = opc
11037      instr[21,10] = uimm12 : unsigned immediate offset
11038      instr[9,5]   = rn may be SP.
11039      instr[4,0]   = rt.  */
11040
11041   uint32_t V = INSTR (26,26);
11042   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
11043   uint32_t imm = INSTR (21, 10);
11044
11045   if (!V)
11046     {
11047       /* GReg operations.  */
11048       switch (dispatch)
11049         {
11050         case 0:  strb_abs (cpu, imm); return;
11051         case 1:  ldrb32_abs (cpu, imm); return;
11052         case 2:  ldrsb_abs (cpu, imm); return;
11053         case 3:  ldrsb32_abs (cpu, imm); return;
11054         case 4:  strh_abs (cpu, imm); return;
11055         case 5:  ldrh32_abs (cpu, imm); return;
11056         case 6:  ldrsh_abs (cpu, imm); return;
11057         case 7:  ldrsh32_abs (cpu, imm); return;
11058         case 8:  str32_abs (cpu, imm); return;
11059         case 9:  ldr32_abs (cpu, imm); return;
11060         case 10: ldrsw_abs (cpu, imm); return;
11061         case 12: str_abs (cpu, imm); return;
11062         case 13: ldr_abs (cpu, imm); return;
11063         case 14: prfm_abs (cpu, imm); return;
11064
11065         default:
11066         case 11:
11067         case 15:
11068           HALT_UNALLOC;
11069         }
11070     }
11071
11072   /* FReg operations.  */
11073   switch (dispatch)
11074     {
11075     case 0:  fstrb_abs (cpu, imm); return;
11076     case 4:  fstrh_abs (cpu, imm); return;
11077     case 8:  fstrs_abs (cpu, imm); return;
11078     case 12: fstrd_abs (cpu, imm); return;
11079     case 2:  fstrq_abs (cpu, imm); return;
11080
11081     case 1:  fldrb_abs (cpu, imm); return;
11082     case 5:  fldrh_abs (cpu, imm); return;
11083     case 9:  fldrs_abs (cpu, imm); return;
11084     case 13: fldrd_abs (cpu, imm); return;
11085     case 3:  fldrq_abs (cpu, imm); return;
11086
11087     default:
11088     case 6:
11089     case 7:
11090     case 10:
11091     case 11:
11092     case 14:
11093     case 15:
11094       HALT_UNALLOC;
11095     }
11096 }
11097
11098 static void
11099 dexLoadExclusive (sim_cpu *cpu)
11100 {
11101   /* assert instr[29:24] = 001000;
11102      instr[31,30] = size
11103      instr[23] = 0 if exclusive
11104      instr[22] = L : 1 if load, 0 if store
11105      instr[21] = 1 if pair
11106      instr[20,16] = Rs
11107      instr[15] = o0 : 1 if ordered
11108      instr[14,10] = Rt2
11109      instr[9,5] = Rn
11110      instr[4.0] = Rt.  */
11111
11112   switch (INSTR (22, 21))
11113     {
11114     case 2:   ldxr (cpu); return;
11115     case 0:   stxr (cpu); return;
11116     default:  HALT_NYI;
11117     }
11118 }
11119
11120 static void
11121 dexLoadOther (sim_cpu *cpu)
11122 {
11123   uint32_t dispatch;
11124
11125   /* instr[29,25] = 111_0
11126      instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11127      instr[21:11,10] is the secondary dispatch.  */
11128   if (INSTR (24, 24))
11129     {
11130       dexLoadUnsignedImmediate (cpu);
11131       return;
11132     }
11133
11134   dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11135   switch (dispatch)
11136     {
11137     case 0: dexLoadUnscaledImmediate (cpu); return;
11138     case 1: dexLoadImmediatePrePost (cpu); return;
11139     case 3: dexLoadImmediatePrePost (cpu); return;
11140     case 6: dexLoadRegisterOffset (cpu); return;
11141
11142     default:
11143     case 2:
11144     case 4:
11145     case 5:
11146     case 7:
11147       HALT_NYI;
11148     }
11149 }
11150
11151 static void
11152 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11153 {
11154   unsigned rn = INSTR (14, 10);
11155   unsigned rd = INSTR (9, 5);
11156   unsigned rm = INSTR (4, 0);
11157   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11158
11159   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11160     HALT_UNALLOC; /* ??? */
11161
11162   offset <<= 2;
11163
11164   if (wb != Post)
11165     address += offset;
11166
11167   aarch64_set_mem_u32 (cpu, address,
11168                        aarch64_get_reg_u32 (cpu, rm, NO_SP));
11169   aarch64_set_mem_u32 (cpu, address + 4,
11170                        aarch64_get_reg_u32 (cpu, rn, NO_SP));
11171
11172   if (wb == Post)
11173     address += offset;
11174
11175   if (wb != NoWriteBack)
11176     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11177 }
11178
11179 static void
11180 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11181 {
11182   unsigned rn = INSTR (14, 10);
11183   unsigned rd = INSTR (9, 5);
11184   unsigned rm = INSTR (4, 0);
11185   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11186
11187   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11188     HALT_UNALLOC; /* ??? */
11189
11190   offset <<= 3;
11191
11192   if (wb != Post)
11193     address += offset;
11194
11195   aarch64_set_mem_u64 (cpu, address,
11196                        aarch64_get_reg_u64 (cpu, rm, NO_SP));
11197   aarch64_set_mem_u64 (cpu, address + 8,
11198                        aarch64_get_reg_u64 (cpu, rn, NO_SP));
11199
11200   if (wb == Post)
11201     address += offset;
11202
11203   if (wb != NoWriteBack)
11204     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11205 }
11206
11207 static void
11208 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11209 {
11210   unsigned rn = INSTR (14, 10);
11211   unsigned rd = INSTR (9, 5);
11212   unsigned rm = INSTR (4, 0);
11213   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11214
11215   /* Treat this as unalloc to make sure we don't do it.  */
11216   if (rn == rm)
11217     HALT_UNALLOC;
11218
11219   offset <<= 2;
11220
11221   if (wb != Post)
11222     address += offset;
11223
11224   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11225   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11226
11227   if (wb == Post)
11228     address += offset;
11229
11230   if (wb != NoWriteBack)
11231     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11232 }
11233
11234 static void
11235 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11236 {
11237   unsigned rn = INSTR (14, 10);
11238   unsigned rd = INSTR (9, 5);
11239   unsigned rm = INSTR (4, 0);
11240   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11241
11242   /* Treat this as unalloc to make sure we don't do it.  */
11243   if (rn == rm)
11244     HALT_UNALLOC;
11245
11246   offset <<= 2;
11247
11248   if (wb != Post)
11249     address += offset;
11250
11251   aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11252   aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11253
11254   if (wb == Post)
11255     address += offset;
11256
11257   if (wb != NoWriteBack)
11258     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11259 }
11260
11261 static void
11262 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11263 {
11264   unsigned rn = INSTR (14, 10);
11265   unsigned rd = INSTR (9, 5);
11266   unsigned rm = INSTR (4, 0);
11267   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11268
11269   /* Treat this as unalloc to make sure we don't do it.  */
11270   if (rn == rm)
11271     HALT_UNALLOC;
11272
11273   offset <<= 3;
11274
11275   if (wb != Post)
11276     address += offset;
11277
11278   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11279   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11280
11281   if (wb == Post)
11282     address += offset;
11283
11284   if (wb != NoWriteBack)
11285     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11286 }
11287
11288 static void
11289 dex_load_store_pair_gr (sim_cpu *cpu)
11290 {
11291   /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11292      instr[29,25] = instruction encoding: 101_0
11293      instr[26]    = V : 1 if fp 0 if gp
11294      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11295      instr[22]    = load/store (1=> load)
11296      instr[21,15] = signed, scaled, offset
11297      instr[14,10] = Rn
11298      instr[ 9, 5] = Rd
11299      instr[ 4, 0] = Rm.  */
11300
11301   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11302   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11303
11304   switch (dispatch)
11305     {
11306     case 2: store_pair_u32 (cpu, offset, Post); return;
11307     case 3: load_pair_u32  (cpu, offset, Post); return;
11308     case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11309     case 5: load_pair_u32  (cpu, offset, NoWriteBack); return;
11310     case 6: store_pair_u32 (cpu, offset, Pre); return;
11311     case 7: load_pair_u32  (cpu, offset, Pre); return;
11312
11313     case 11: load_pair_s32  (cpu, offset, Post); return;
11314     case 13: load_pair_s32  (cpu, offset, NoWriteBack); return;
11315     case 15: load_pair_s32  (cpu, offset, Pre); return;
11316
11317     case 18: store_pair_u64 (cpu, offset, Post); return;
11318     case 19: load_pair_u64  (cpu, offset, Post); return;
11319     case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11320     case 21: load_pair_u64  (cpu, offset, NoWriteBack); return;
11321     case 22: store_pair_u64 (cpu, offset, Pre); return;
11322     case 23: load_pair_u64  (cpu, offset, Pre); return;
11323
11324     default:
11325       HALT_UNALLOC;
11326     }
11327 }
11328
11329 static void
11330 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11331 {
11332   unsigned rn = INSTR (14, 10);
11333   unsigned rd = INSTR (9, 5);
11334   unsigned rm = INSTR (4, 0);
11335   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11336
11337   offset <<= 2;
11338
11339   if (wb != Post)
11340     address += offset;
11341
11342   aarch64_set_mem_u32 (cpu, address,     aarch64_get_vec_u32 (cpu, rm, 0));
11343   aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11344
11345   if (wb == Post)
11346     address += offset;
11347
11348   if (wb != NoWriteBack)
11349     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11350 }
11351
11352 static void
11353 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11354 {
11355   unsigned rn = INSTR (14, 10);
11356   unsigned rd = INSTR (9, 5);
11357   unsigned rm = INSTR (4, 0);
11358   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11359
11360   offset <<= 3;
11361
11362   if (wb != Post)
11363     address += offset;
11364
11365   aarch64_set_mem_u64 (cpu, address,     aarch64_get_vec_u64 (cpu, rm, 0));
11366   aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11367
11368   if (wb == Post)
11369     address += offset;
11370
11371   if (wb != NoWriteBack)
11372     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11373 }
11374
11375 static void
11376 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11377 {
11378   FRegister a;
11379   unsigned rn = INSTR (14, 10);
11380   unsigned rd = INSTR (9, 5);
11381   unsigned rm = INSTR (4, 0);
11382   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11383
11384   offset <<= 4;
11385
11386   if (wb != Post)
11387     address += offset;
11388
11389   aarch64_get_FP_long_double (cpu, rm, & a);
11390   aarch64_set_mem_long_double (cpu, address, a);
11391   aarch64_get_FP_long_double (cpu, rn, & a);
11392   aarch64_set_mem_long_double (cpu, address + 16, a);
11393
11394   if (wb == Post)
11395     address += offset;
11396
11397   if (wb != NoWriteBack)
11398     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11399 }
11400
11401 static void
11402 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11403 {
11404   unsigned rn = INSTR (14, 10);
11405   unsigned rd = INSTR (9, 5);
11406   unsigned rm = INSTR (4, 0);
11407   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11408
11409   if (rm == rn)
11410     HALT_UNALLOC;
11411
11412   offset <<= 2;
11413
11414   if (wb != Post)
11415     address += offset;
11416
11417   aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11418   aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11419
11420   if (wb == Post)
11421     address += offset;
11422
11423   if (wb != NoWriteBack)
11424     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11425 }
11426
11427 static void
11428 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11429 {
11430   unsigned rn = INSTR (14, 10);
11431   unsigned rd = INSTR (9, 5);
11432   unsigned rm = INSTR (4, 0);
11433   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11434
11435   if (rm == rn)
11436     HALT_UNALLOC;
11437
11438   offset <<= 3;
11439
11440   if (wb != Post)
11441     address += offset;
11442
11443   aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11444   aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11445
11446   if (wb == Post)
11447     address += offset;
11448
11449   if (wb != NoWriteBack)
11450     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11451 }
11452
11453 static void
11454 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11455 {
11456   FRegister a;
11457   unsigned rn = INSTR (14, 10);
11458   unsigned rd = INSTR (9, 5);
11459   unsigned rm = INSTR (4, 0);
11460   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11461
11462   if (rm == rn)
11463     HALT_UNALLOC;
11464
11465   offset <<= 4;
11466
11467   if (wb != Post)
11468     address += offset;
11469
11470   aarch64_get_mem_long_double (cpu, address, & a);
11471   aarch64_set_FP_long_double (cpu, rm, a);
11472   aarch64_get_mem_long_double (cpu, address + 16, & a);
11473   aarch64_set_FP_long_double (cpu, rn, a);
11474
11475   if (wb == Post)
11476     address += offset;
11477
11478   if (wb != NoWriteBack)
11479     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11480 }
11481
11482 static void
11483 dex_load_store_pair_fp (sim_cpu *cpu)
11484 {
11485   /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11486      instr[29,25] = instruction encoding
11487      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11488      instr[22]    = load/store (1=> load)
11489      instr[21,15] = signed, scaled, offset
11490      instr[14,10] = Rn
11491      instr[ 9, 5] = Rd
11492      instr[ 4, 0] = Rm  */
11493
11494   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11495   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11496
11497   switch (dispatch)
11498     {
11499     case 2: store_pair_float (cpu, offset, Post); return;
11500     case 3: load_pair_float  (cpu, offset, Post); return;
11501     case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11502     case 5: load_pair_float  (cpu, offset, NoWriteBack); return;
11503     case 6: store_pair_float (cpu, offset, Pre); return;
11504     case 7: load_pair_float  (cpu, offset, Pre); return;
11505
11506     case 10: store_pair_double (cpu, offset, Post); return;
11507     case 11: load_pair_double  (cpu, offset, Post); return;
11508     case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11509     case 13: load_pair_double  (cpu, offset, NoWriteBack); return;
11510     case 14: store_pair_double (cpu, offset, Pre); return;
11511     case 15: load_pair_double  (cpu, offset, Pre); return;
11512
11513     case 18: store_pair_long_double (cpu, offset, Post); return;
11514     case 19: load_pair_long_double  (cpu, offset, Post); return;
11515     case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11516     case 21: load_pair_long_double  (cpu, offset, NoWriteBack); return;
11517     case 22: store_pair_long_double (cpu, offset, Pre); return;
11518     case 23: load_pair_long_double  (cpu, offset, Pre); return;
11519
11520     default:
11521       HALT_UNALLOC;
11522     }
11523 }
11524
11525 static inline unsigned
11526 vec_reg (unsigned v, unsigned o)
11527 {
11528   return (v + o) & 0x3F;
11529 }
11530
11531 /* Load multiple N-element structures to M consecutive registers.  */
11532 static void
11533 vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11534 {
11535   int      all  = INSTR (30, 30);
11536   unsigned size = INSTR (11, 10);
11537   unsigned vd   = INSTR (4, 0);
11538   unsigned rpt = (N == M) ? 1 : M;
11539   unsigned selem = N;
11540   unsigned i, j, k;
11541
11542   switch (size)
11543     {
11544     case 0: /* 8-bit operations.  */
11545       for (i = 0; i < rpt; i++)
11546         for (j = 0; j < (8 + (8 * all)); j++)
11547           for (k = 0; k < selem; k++)
11548             {
11549               aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j,
11550                                   aarch64_get_mem_u8 (cpu, address));
11551               address += 1;
11552             }
11553       return;
11554
11555     case 1: /* 16-bit operations.  */
11556       for (i = 0; i < rpt; i++)
11557         for (j = 0; j < (4 + (4 * all)); j++)
11558           for (k = 0; k < selem; k++)
11559             {
11560               aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j,
11561                                    aarch64_get_mem_u16 (cpu, address));
11562               address += 2;
11563             }
11564       return;
11565
11566     case 2: /* 32-bit operations.  */
11567       for (i = 0; i < rpt; i++)
11568         for (j = 0; j < (2 + (2 * all)); j++)
11569           for (k = 0; k < selem; k++)
11570             {
11571               aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j,
11572                                    aarch64_get_mem_u32 (cpu, address));
11573               address += 4;
11574             }
11575       return;
11576
11577     case 3: /* 64-bit operations.  */
11578       for (i = 0; i < rpt; i++)
11579         for (j = 0; j < (1 + all); j++)
11580           for (k = 0; k < selem; k++)
11581             {
11582               aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j,
11583                                    aarch64_get_mem_u64 (cpu, address));
11584               address += 8;
11585             }
11586       return;
11587     }
11588 }
11589
11590 /* Load multiple 4-element structures into four consecutive registers.  */
11591 static void
11592 LD4 (sim_cpu *cpu, uint64_t address)
11593 {
11594   vec_load (cpu, address, 4, 4);
11595 }
11596
11597 /* Load multiple 3-element structures into three consecutive registers.  */
11598 static void
11599 LD3 (sim_cpu *cpu, uint64_t address)
11600 {
11601   vec_load (cpu, address, 3, 3);
11602 }
11603
11604 /* Load multiple 2-element structures into two consecutive registers.  */
11605 static void
11606 LD2 (sim_cpu *cpu, uint64_t address)
11607 {
11608   vec_load (cpu, address, 2, 2);
11609 }
11610
11611 /* Load multiple 1-element structures into one register.  */
11612 static void
11613 LD1_1 (sim_cpu *cpu, uint64_t address)
11614 {
11615   vec_load (cpu, address, 1, 1);
11616 }
11617
11618 /* Load multiple 1-element structures into two registers.  */
11619 static void
11620 LD1_2 (sim_cpu *cpu, uint64_t address)
11621 {
11622   vec_load (cpu, address, 1, 2);
11623 }
11624
11625 /* Load multiple 1-element structures into three registers.  */
11626 static void
11627 LD1_3 (sim_cpu *cpu, uint64_t address)
11628 {
11629   vec_load (cpu, address, 1, 3);
11630 }
11631
11632 /* Load multiple 1-element structures into four registers.  */
11633 static void
11634 LD1_4 (sim_cpu *cpu, uint64_t address)
11635 {
11636   vec_load (cpu, address, 1, 4);
11637 }
11638
11639 /* Store multiple N-element structures from M consecutive registers.  */
11640 static void
11641 vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11642 {
11643   int      all  = INSTR (30, 30);
11644   unsigned size = INSTR (11, 10);
11645   unsigned vd   = INSTR (4, 0);
11646   unsigned rpt = (N == M) ? 1 : M;
11647   unsigned selem = N;
11648   unsigned i, j, k;
11649
11650   switch (size)
11651     {
11652     case 0: /* 8-bit operations.  */
11653       for (i = 0; i < rpt; i++)
11654         for (j = 0; j < (8 + (8 * all)); j++)
11655           for (k = 0; k < selem; k++)
11656             {
11657               aarch64_set_mem_u8
11658                 (cpu, address,
11659                  aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j));
11660               address += 1;
11661             }
11662       return;
11663
11664     case 1: /* 16-bit operations.  */
11665       for (i = 0; i < rpt; i++)
11666         for (j = 0; j < (4 + (4 * all)); j++)
11667           for (k = 0; k < selem; k++)
11668             {
11669               aarch64_set_mem_u16
11670                 (cpu, address,
11671                  aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j));
11672               address += 2;
11673             }
11674       return;
11675
11676     case 2: /* 32-bit operations.  */
11677       for (i = 0; i < rpt; i++)
11678         for (j = 0; j < (2 + (2 * all)); j++)
11679           for (k = 0; k < selem; k++)
11680             {
11681               aarch64_set_mem_u32
11682                 (cpu, address,
11683                  aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j));
11684               address += 4;
11685             }
11686       return;
11687
11688     case 3: /* 64-bit operations.  */
11689       for (i = 0; i < rpt; i++)
11690         for (j = 0; j < (1 + all); j++)
11691           for (k = 0; k < selem; k++)
11692             {
11693               aarch64_set_mem_u64
11694                 (cpu, address,
11695                  aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j));
11696               address += 8;
11697             }
11698       return;
11699     }
11700 }
11701
11702 /* Store multiple 4-element structure from four consecutive registers.  */
11703 static void
11704 ST4 (sim_cpu *cpu, uint64_t address)
11705 {
11706   vec_store (cpu, address, 4, 4);
11707 }
11708
11709 /* Store multiple 3-element structures from three consecutive registers.  */
11710 static void
11711 ST3 (sim_cpu *cpu, uint64_t address)
11712 {
11713   vec_store (cpu, address, 3, 3);
11714 }
11715
11716 /* Store multiple 2-element structures from two consecutive registers.  */
11717 static void
11718 ST2 (sim_cpu *cpu, uint64_t address)
11719 {
11720   vec_store (cpu, address, 2, 2);
11721 }
11722
11723 /* Store multiple 1-element structures from one register.  */
11724 static void
11725 ST1_1 (sim_cpu *cpu, uint64_t address)
11726 {
11727   vec_store (cpu, address, 1, 1);
11728 }
11729
11730 /* Store multiple 1-element structures from two registers.  */
11731 static void
11732 ST1_2 (sim_cpu *cpu, uint64_t address)
11733 {
11734   vec_store (cpu, address, 1, 2);
11735 }
11736
11737 /* Store multiple 1-element structures from three registers.  */
11738 static void
11739 ST1_3 (sim_cpu *cpu, uint64_t address)
11740 {
11741   vec_store (cpu, address, 1, 3);
11742 }
11743
11744 /* Store multiple 1-element structures from four registers.  */
11745 static void
11746 ST1_4 (sim_cpu *cpu, uint64_t address)
11747 {
11748   vec_store (cpu, address, 1, 4);
11749 }
11750
11751 #define LDn_STn_SINGLE_LANE_AND_SIZE()                          \
11752   do                                                            \
11753     {                                                           \
11754       switch (INSTR (15, 14))                                   \
11755         {                                                       \
11756         case 0:                                                 \
11757           lane = (full << 3) | (s << 2) | size;                 \
11758           size = 0;                                             \
11759           break;                                                \
11760                                                                 \
11761         case 1:                                                 \
11762           if ((size & 1) == 1)                                  \
11763             HALT_UNALLOC;                                       \
11764           lane = (full << 2) | (s << 1) | (size >> 1);          \
11765           size = 1;                                             \
11766           break;                                                \
11767                                                                 \
11768         case 2:                                                 \
11769           if ((size & 2) == 2)                                  \
11770             HALT_UNALLOC;                                       \
11771                                                                 \
11772           if ((size & 1) == 0)                                  \
11773             {                                                   \
11774               lane = (full << 1) | s;                           \
11775               size = 2;                                         \
11776             }                                                   \
11777           else                                                  \
11778             {                                                   \
11779               if (s)                                            \
11780                 HALT_UNALLOC;                                   \
11781               lane = full;                                      \
11782               size = 3;                                         \
11783             }                                                   \
11784           break;                                                \
11785                                                                 \
11786         default:                                                \
11787           HALT_UNALLOC;                                         \
11788         }                                                       \
11789     }                                                           \
11790   while (0)
11791
11792 /* Load single structure into one lane of N registers.  */
11793 static void
11794 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11795 {
11796   /* instr[31]    = 0
11797      instr[30]    = element selector 0=>half, 1=>all elements
11798      instr[29,24] = 00 1101
11799      instr[23]    = 0=>simple, 1=>post
11800      instr[22]    = 1
11801      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11802      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11803                       11111 (immediate post inc)
11804      instr[15,13] = opcode
11805      instr[12]    = S, used for lane number
11806      instr[11,10] = size, also used for lane number
11807      instr[9,5]   = address
11808      instr[4,0]   = Vd  */
11809
11810   unsigned full = INSTR (30, 30);
11811   unsigned vd = INSTR (4, 0);
11812   unsigned size = INSTR (11, 10);
11813   unsigned s = INSTR (12, 12);
11814   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11815   int lane = 0;
11816   int i;
11817
11818   NYI_assert (29, 24, 0x0D);
11819   NYI_assert (22, 22, 1);
11820
11821   /* Compute the lane number first (using size), and then compute size.  */
11822   LDn_STn_SINGLE_LANE_AND_SIZE ();
11823
11824   for (i = 0; i < nregs; i++)
11825     switch (size)
11826       {
11827       case 0:
11828         {
11829           uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11830           aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11831           break;
11832         }
11833
11834       case 1:
11835         {
11836           uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11837           aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11838           break;
11839         }
11840
11841       case 2:
11842         {
11843           uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11844           aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11845           break;
11846         }
11847
11848       case 3:
11849         {
11850           uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11851           aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11852           break;
11853         }
11854       }
11855 }
11856
11857 /* Store single structure from one lane from N registers.  */
11858 static void
11859 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11860 {
11861   /* instr[31]    = 0
11862      instr[30]    = element selector 0=>half, 1=>all elements
11863      instr[29,24] = 00 1101
11864      instr[23]    = 0=>simple, 1=>post
11865      instr[22]    = 0
11866      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11867      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11868                       11111 (immediate post inc)
11869      instr[15,13] = opcode
11870      instr[12]    = S, used for lane number
11871      instr[11,10] = size, also used for lane number
11872      instr[9,5]   = address
11873      instr[4,0]   = Vd  */
11874
11875   unsigned full = INSTR (30, 30);
11876   unsigned vd = INSTR (4, 0);
11877   unsigned size = INSTR (11, 10);
11878   unsigned s = INSTR (12, 12);
11879   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11880   int lane = 0;
11881   int i;
11882
11883   NYI_assert (29, 24, 0x0D);
11884   NYI_assert (22, 22, 0);
11885
11886   /* Compute the lane number first (using size), and then compute size.  */
11887   LDn_STn_SINGLE_LANE_AND_SIZE ();
11888
11889   for (i = 0; i < nregs; i++)
11890     switch (size)
11891       {
11892       case 0:
11893         {
11894           uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11895           aarch64_set_mem_u8 (cpu, address + i, val);
11896           break;
11897         }
11898
11899       case 1:
11900         {
11901           uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11902           aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11903           break;
11904         }
11905
11906       case 2:
11907         {
11908           uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11909           aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11910           break;
11911         }
11912
11913       case 3:
11914         {
11915           uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11916           aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11917           break;
11918         }
11919       }
11920 }
11921
11922 /* Load single structure into all lanes of N registers.  */
11923 static void
11924 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11925 {
11926   /* instr[31]    = 0
11927      instr[30]    = element selector 0=>half, 1=>all elements
11928      instr[29,24] = 00 1101
11929      instr[23]    = 0=>simple, 1=>post
11930      instr[22]    = 1
11931      instr[21]    = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11932      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11933                       11111 (immediate post inc)
11934      instr[15,14] = 11
11935      instr[13]    = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11936      instr[12]    = 0
11937      instr[11,10] = element size 00=> byte(b), 01=> half(h),
11938                                  10=> word(s), 11=> double(d)
11939      instr[9,5]   = address
11940      instr[4,0]   = Vd  */
11941
11942   unsigned full = INSTR (30, 30);
11943   unsigned vd = INSTR (4, 0);
11944   unsigned size = INSTR (11, 10);
11945   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11946   int i, n;
11947
11948   NYI_assert (29, 24, 0x0D);
11949   NYI_assert (22, 22, 1);
11950   NYI_assert (15, 14, 3);
11951   NYI_assert (12, 12, 0);
11952
11953   for (n = 0; n < nregs; n++)
11954     switch (size)
11955       {
11956       case 0:
11957         {
11958           uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11959           for (i = 0; i < (full ? 16 : 8); i++)
11960             aarch64_set_vec_u8 (cpu, vd + n, i, val);
11961           break;
11962         }
11963
11964       case 1:
11965         {
11966           uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11967           for (i = 0; i < (full ? 8 : 4); i++)
11968             aarch64_set_vec_u16 (cpu, vd + n, i, val);
11969           break;
11970         }
11971
11972       case 2:
11973         {
11974           uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11975           for (i = 0; i < (full ? 4 : 2); i++)
11976             aarch64_set_vec_u32 (cpu, vd + n, i, val);
11977           break;
11978         }
11979
11980       case 3:
11981         {
11982           uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11983           for (i = 0; i < (full ? 2 : 1); i++)
11984             aarch64_set_vec_u64 (cpu, vd + n, i, val);
11985           break;
11986         }
11987
11988       default:
11989         HALT_UNALLOC;
11990       }
11991 }
11992
11993 static void
11994 do_vec_load_store (sim_cpu *cpu)
11995 {
11996   /* {LD|ST}<N>   {Vd..Vd+N}, vaddr
11997
11998      instr[31]    = 0
11999      instr[30]    = element selector 0=>half, 1=>all elements
12000      instr[29,25] = 00110
12001      instr[24]    = 0=>multiple struct, 1=>single struct
12002      instr[23]    = 0=>simple, 1=>post
12003      instr[22]    = 0=>store, 1=>load
12004      instr[21]    = 0 (LDn) / small(0)-large(1) selector (LDnR)
12005      instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
12006                     11111 (immediate post inc)
12007      instr[15,12] = elements and destinations.  eg for load:
12008                      0000=>LD4 => load multiple 4-element to
12009                      four consecutive registers
12010                      0100=>LD3 => load multiple 3-element to
12011                      three consecutive registers
12012                      1000=>LD2 => load multiple 2-element to
12013                      two consecutive registers
12014                      0010=>LD1 => load multiple 1-element to
12015                      four consecutive registers
12016                      0110=>LD1 => load multiple 1-element to
12017                      three consecutive registers
12018                      1010=>LD1 => load multiple 1-element to
12019                      two consecutive registers
12020                      0111=>LD1 => load multiple 1-element to
12021                      one register
12022                      1100=>LDR1,LDR2
12023                      1110=>LDR3,LDR4
12024      instr[11,10] = element size 00=> byte(b), 01=> half(h),
12025                                  10=> word(s), 11=> double(d)
12026      instr[9,5]   = Vn, can be SP
12027      instr[4,0]   = Vd  */
12028
12029   int single;
12030   int post;
12031   int load;
12032   unsigned vn;
12033   uint64_t address;
12034   int type;
12035
12036   if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12037     HALT_NYI;
12038
12039   single = INSTR (24, 24);
12040   post = INSTR (23, 23);
12041   load = INSTR (22, 22);
12042   type = INSTR (15, 12);
12043   vn = INSTR (9, 5);
12044   address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12045
12046   if (! single && INSTR (21, 21) != 0)
12047     HALT_UNALLOC;
12048
12049   if (post)
12050     {
12051       unsigned vm = INSTR (20, 16);
12052
12053       if (vm == R31)
12054         {
12055           unsigned sizeof_operation;
12056
12057           if (single)
12058             {
12059               if ((type >= 0) && (type <= 11))
12060                 {
12061                   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12062                   switch (INSTR (15, 14))
12063                     {
12064                     case 0:
12065                       sizeof_operation = nregs * 1;
12066                       break;
12067                     case 1:
12068                       sizeof_operation = nregs * 2;
12069                       break;
12070                     case 2:
12071                       if (INSTR (10, 10) == 0)
12072                         sizeof_operation = nregs * 4;
12073                       else
12074                         sizeof_operation = nregs * 8;
12075                       break;
12076                     default:
12077                       HALT_UNALLOC;
12078                     }
12079                 }
12080               else if (type == 0xC)
12081                 {
12082                   sizeof_operation = INSTR (21, 21) ? 2 : 1;
12083                   sizeof_operation <<= INSTR (11, 10);
12084                 }
12085               else if (type == 0xE)
12086                 {
12087                   sizeof_operation = INSTR (21, 21) ? 4 : 3;
12088                   sizeof_operation <<= INSTR (11, 10);
12089                 }
12090               else
12091                 HALT_UNALLOC;
12092             }
12093           else
12094             {
12095               switch (type)
12096                 {
12097                 case 0: sizeof_operation = 32; break;
12098                 case 4: sizeof_operation = 24; break;
12099                 case 8: sizeof_operation = 16; break;
12100
12101                 case 7:
12102                   /* One register, immediate offset variant.  */
12103                   sizeof_operation = 8;
12104                   break;
12105
12106                 case 10:
12107                   /* Two registers, immediate offset variant.  */
12108                   sizeof_operation = 16;
12109                   break;
12110
12111                 case 6:
12112                   /* Three registers, immediate offset variant.  */
12113                   sizeof_operation = 24;
12114                   break;
12115
12116                 case 2:
12117                   /* Four registers, immediate offset variant.  */
12118                   sizeof_operation = 32;
12119                   break;
12120
12121                 default:
12122                   HALT_UNALLOC;
12123                 }
12124
12125               if (INSTR (30, 30))
12126                 sizeof_operation *= 2;
12127             }
12128
12129           aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12130         }
12131       else
12132         aarch64_set_reg_u64 (cpu, vn, SP_OK,
12133                              address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12134     }
12135   else
12136     {
12137       NYI_assert (20, 16, 0);
12138     }
12139
12140   if (single)
12141     {
12142       if (load)
12143         {
12144           if ((type >= 0) && (type <= 11))
12145             do_vec_LDn_single (cpu, address);
12146           else if ((type == 0xC) || (type == 0xE))
12147             do_vec_LDnR (cpu, address);
12148           else
12149             HALT_UNALLOC;
12150           return;
12151         }
12152
12153       /* Stores.  */
12154       if ((type >= 0) && (type <= 11))
12155         {
12156           do_vec_STn_single (cpu, address);
12157           return;
12158         }
12159
12160       HALT_UNALLOC;
12161     }
12162
12163   if (load)
12164     {
12165       switch (type)
12166         {
12167         case 0:  LD4 (cpu, address); return;
12168         case 4:  LD3 (cpu, address); return;
12169         case 8:  LD2 (cpu, address); return;
12170         case 2:  LD1_4 (cpu, address); return;
12171         case 6:  LD1_3 (cpu, address); return;
12172         case 10: LD1_2 (cpu, address); return;
12173         case 7:  LD1_1 (cpu, address); return;
12174
12175         default:
12176           HALT_UNALLOC;
12177         }
12178     }
12179
12180   /* Stores.  */
12181   switch (type)
12182     {
12183     case 0:  ST4 (cpu, address); return;
12184     case 4:  ST3 (cpu, address); return;
12185     case 8:  ST2 (cpu, address); return;
12186     case 2:  ST1_4 (cpu, address); return;
12187     case 6:  ST1_3 (cpu, address); return;
12188     case 10: ST1_2 (cpu, address); return;
12189     case 7:  ST1_1 (cpu, address); return;
12190     default:
12191       HALT_UNALLOC;
12192     }
12193 }
12194
12195 static void
12196 dexLdSt (sim_cpu *cpu)
12197 {
12198   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12199      assert  group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12200              group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12201      bits [29,28:26] of a LS are the secondary dispatch vector.  */
12202   uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12203
12204   switch (group2)
12205     {
12206     case LS_EXCL_000:
12207       dexLoadExclusive (cpu); return;
12208
12209     case LS_LIT_010:
12210     case LS_LIT_011:
12211       dexLoadLiteral (cpu); return;
12212
12213     case LS_OTHER_110:
12214     case LS_OTHER_111:
12215       dexLoadOther (cpu); return;
12216
12217     case LS_ADVSIMD_001:
12218       do_vec_load_store (cpu); return;
12219
12220     case LS_PAIR_100:
12221       dex_load_store_pair_gr (cpu); return;
12222
12223     case LS_PAIR_101:
12224       dex_load_store_pair_fp (cpu); return;
12225
12226     default:
12227       /* Should never reach here.  */
12228       HALT_NYI;
12229     }
12230 }
12231
12232 /* Specific decode and execute for group Data Processing Register.  */
12233
12234 static void
12235 dexLogicalShiftedRegister (sim_cpu *cpu)
12236 {
12237   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12238      instr[30,29] = op
12239      instr[28:24] = 01010
12240      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12241      instr[21]    = N
12242      instr[20,16] = Rm
12243      instr[15,10] = count : must be 0xxxxx for 32 bit
12244      instr[9,5]   = Rn
12245      instr[4,0]   = Rd  */
12246
12247   uint32_t size      = INSTR (31, 31);
12248   Shift    shiftType = INSTR (23, 22);
12249   uint32_t count     = INSTR (15, 10);
12250
12251   /* 32 bit operations must have count[5] = 0.
12252      or else we have an UNALLOC.  */
12253   if (size == 0 && uimm (count, 5, 5))
12254     HALT_UNALLOC;
12255
12256   /* Dispatch on size:op:N.  */
12257   switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12258     {
12259     case 0: and32_shift  (cpu, shiftType, count); return;
12260     case 1: bic32_shift  (cpu, shiftType, count); return;
12261     case 2: orr32_shift  (cpu, shiftType, count); return;
12262     case 3: orn32_shift  (cpu, shiftType, count); return;
12263     case 4: eor32_shift  (cpu, shiftType, count); return;
12264     case 5: eon32_shift  (cpu, shiftType, count); return;
12265     case 6: ands32_shift (cpu, shiftType, count); return;
12266     case 7: bics32_shift (cpu, shiftType, count); return;
12267     case 8: and64_shift  (cpu, shiftType, count); return;
12268     case 9: bic64_shift  (cpu, shiftType, count); return;
12269     case 10:orr64_shift  (cpu, shiftType, count); return;
12270     case 11:orn64_shift  (cpu, shiftType, count); return;
12271     case 12:eor64_shift  (cpu, shiftType, count); return;
12272     case 13:eon64_shift  (cpu, shiftType, count); return;
12273     case 14:ands64_shift (cpu, shiftType, count); return;
12274     case 15:bics64_shift (cpu, shiftType, count); return;
12275     }
12276 }
12277
12278 /* 32 bit conditional select.  */
12279 static void
12280 csel32 (sim_cpu *cpu, CondCode cc)
12281 {
12282   unsigned rm = INSTR (20, 16);
12283   unsigned rn = INSTR (9, 5);
12284   unsigned rd = INSTR (4, 0);
12285
12286   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12287                        testConditionCode (cpu, cc)
12288                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12289                        : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12290 }
12291
12292 /* 64 bit conditional select.  */
12293 static void
12294 csel64 (sim_cpu *cpu, CondCode cc)
12295 {
12296   unsigned rm = INSTR (20, 16);
12297   unsigned rn = INSTR (9, 5);
12298   unsigned rd = INSTR (4, 0);
12299
12300   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12301                        testConditionCode (cpu, cc)
12302                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12303                        : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12304 }
12305
12306 /* 32 bit conditional increment.  */
12307 static void
12308 csinc32 (sim_cpu *cpu, CondCode cc)
12309 {
12310   unsigned rm = INSTR (20, 16);
12311   unsigned rn = INSTR (9, 5);
12312   unsigned rd = INSTR (4, 0);
12313
12314   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12315                        testConditionCode (cpu, cc)
12316                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12317                        : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12318 }
12319
12320 /* 64 bit conditional increment.  */
12321 static void
12322 csinc64 (sim_cpu *cpu, CondCode cc)
12323 {
12324   unsigned rm = INSTR (20, 16);
12325   unsigned rn = INSTR (9, 5);
12326   unsigned rd = INSTR (4, 0);
12327
12328   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12329                        testConditionCode (cpu, cc)
12330                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12331                        : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12332 }
12333
12334 /* 32 bit conditional invert.  */
12335 static void
12336 csinv32 (sim_cpu *cpu, CondCode cc)
12337 {
12338   unsigned rm = INSTR (20, 16);
12339   unsigned rn = INSTR (9, 5);
12340   unsigned rd = INSTR (4, 0);
12341
12342   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12343                        testConditionCode (cpu, cc)
12344                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12345                        : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12346 }
12347
12348 /* 64 bit conditional invert.  */
12349 static void
12350 csinv64 (sim_cpu *cpu, CondCode cc)
12351 {
12352   unsigned rm = INSTR (20, 16);
12353   unsigned rn = INSTR (9, 5);
12354   unsigned rd = INSTR (4, 0);
12355
12356   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12357                        testConditionCode (cpu, cc)
12358                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12359                        : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12360 }
12361
12362 /* 32 bit conditional negate.  */
12363 static void
12364 csneg32 (sim_cpu *cpu, CondCode cc)
12365 {
12366   unsigned rm = INSTR (20, 16);
12367   unsigned rn = INSTR (9, 5);
12368   unsigned rd = INSTR (4, 0);
12369
12370   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12371                        testConditionCode (cpu, cc)
12372                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12373                        : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12374 }
12375
12376 /* 64 bit conditional negate.  */
12377 static void
12378 csneg64 (sim_cpu *cpu, CondCode cc)
12379 {
12380   unsigned rm = INSTR (20, 16);
12381   unsigned rn = INSTR (9, 5);
12382   unsigned rd = INSTR (4, 0);
12383
12384   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12385                        testConditionCode (cpu, cc)
12386                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12387                        : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12388 }
12389
12390 static void
12391 dexCondSelect (sim_cpu *cpu)
12392 {
12393   /* instr[28,21] = 11011011
12394      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12395      instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12396                             100 ==> CSINV, 101 ==> CSNEG,
12397                             _1_ ==> UNALLOC
12398      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12399      instr[15,12] = cond
12400      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC  */
12401
12402   CondCode cc = INSTR (15, 12);
12403   uint32_t S = INSTR (29, 29);
12404   uint32_t op2 = INSTR (11, 10);
12405
12406   if (S == 1)
12407     HALT_UNALLOC;
12408
12409   if (op2 & 0x2)
12410     HALT_UNALLOC;
12411
12412   switch ((INSTR (31, 30) << 1) | op2)
12413     {
12414     case 0: csel32  (cpu, cc); return;
12415     case 1: csinc32 (cpu, cc); return;
12416     case 2: csinv32 (cpu, cc); return;
12417     case 3: csneg32 (cpu, cc); return;
12418     case 4: csel64  (cpu, cc); return;
12419     case 5: csinc64 (cpu, cc); return;
12420     case 6: csinv64 (cpu, cc); return;
12421     case 7: csneg64 (cpu, cc); return;
12422     }
12423 }
12424
12425 /* Some helpers for counting leading 1 or 0 bits.  */
12426
12427 /* Counts the number of leading bits which are the same
12428    in a 32 bit value in the range 1 to 32.  */
12429 static uint32_t
12430 leading32 (uint32_t value)
12431 {
12432   int32_t mask= 0xffff0000;
12433   uint32_t count= 16; /* Counts number of bits set in mask.  */
12434   uint32_t lo = 1;    /* Lower bound for number of sign bits.  */
12435   uint32_t hi = 32;   /* Upper bound for number of sign bits.  */
12436
12437   while (lo + 1 < hi)
12438     {
12439       int32_t test = (value & mask);
12440
12441       if (test == 0 || test == mask)
12442         {
12443           lo = count;
12444           count = (lo + hi) / 2;
12445           mask >>= (count - lo);
12446         }
12447       else
12448         {
12449           hi = count;
12450           count = (lo + hi) / 2;
12451           mask <<= hi - count;
12452         }
12453     }
12454
12455   if (lo != hi)
12456     {
12457       int32_t test;
12458
12459       mask >>= 1;
12460       test = (value & mask);
12461
12462       if (test == 0 || test == mask)
12463         count = hi;
12464       else
12465         count = lo;
12466     }
12467
12468   return count;
12469 }
12470
12471 /* Counts the number of leading bits which are the same
12472    in a 64 bit value in the range 1 to 64.  */
12473 static uint64_t
12474 leading64 (uint64_t value)
12475 {
12476   int64_t mask= 0xffffffff00000000LL;
12477   uint64_t count = 32; /* Counts number of bits set in mask.  */
12478   uint64_t lo = 1;     /* Lower bound for number of sign bits.  */
12479   uint64_t hi = 64;    /* Upper bound for number of sign bits.  */
12480
12481   while (lo + 1 < hi)
12482     {
12483       int64_t test = (value & mask);
12484
12485       if (test == 0 || test == mask)
12486         {
12487           lo = count;
12488           count = (lo + hi) / 2;
12489           mask >>= (count - lo);
12490         }
12491       else
12492         {
12493           hi = count;
12494           count = (lo + hi) / 2;
12495           mask <<= hi - count;
12496         }
12497     }
12498
12499   if (lo != hi)
12500     {
12501       int64_t test;
12502
12503       mask >>= 1;
12504       test = (value & mask);
12505
12506       if (test == 0 || test == mask)
12507         count = hi;
12508       else
12509         count = lo;
12510     }
12511
12512   return count;
12513 }
12514
12515 /* Bit operations.  */
12516 /* N.B register args may not be SP.  */
12517
12518 /* 32 bit count leading sign bits.  */
12519 static void
12520 cls32 (sim_cpu *cpu)
12521 {
12522   unsigned rn = INSTR (9, 5);
12523   unsigned rd = INSTR (4, 0);
12524
12525   /* N.B. the result needs to exclude the leading bit.  */
12526   aarch64_set_reg_u64
12527     (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12528 }
12529
12530 /* 64 bit count leading sign bits.  */
12531 static void
12532 cls64 (sim_cpu *cpu)
12533 {
12534   unsigned rn = INSTR (9, 5);
12535   unsigned rd = INSTR (4, 0);
12536
12537   /* N.B. the result needs to exclude the leading bit.  */
12538   aarch64_set_reg_u64
12539     (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12540 }
12541
12542 /* 32 bit count leading zero bits.  */
12543 static void
12544 clz32 (sim_cpu *cpu)
12545 {
12546   unsigned rn = INSTR (9, 5);
12547   unsigned rd = INSTR (4, 0);
12548   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12549
12550   /* if the sign (top) bit is set then the count is 0.  */
12551   if (pick32 (value, 31, 31))
12552     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12553   else
12554     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12555 }
12556
12557 /* 64 bit count leading zero bits.  */
12558 static void
12559 clz64 (sim_cpu *cpu)
12560 {
12561   unsigned rn = INSTR (9, 5);
12562   unsigned rd = INSTR (4, 0);
12563   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12564
12565   /* if the sign (top) bit is set then the count is 0.  */
12566   if (pick64 (value, 63, 63))
12567     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12568   else
12569     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12570 }
12571
12572 /* 32 bit reverse bits.  */
12573 static void
12574 rbit32 (sim_cpu *cpu)
12575 {
12576   unsigned rn = INSTR (9, 5);
12577   unsigned rd = INSTR (4, 0);
12578   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12579   uint32_t result = 0;
12580   int i;
12581
12582   for (i = 0; i < 32; i++)
12583     {
12584       result <<= 1;
12585       result |= (value & 1);
12586       value >>= 1;
12587     }
12588   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12589 }
12590
12591 /* 64 bit reverse bits.  */
12592 static void
12593 rbit64 (sim_cpu *cpu)
12594 {
12595   unsigned rn = INSTR (9, 5);
12596   unsigned rd = INSTR (4, 0);
12597   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12598   uint64_t result = 0;
12599   int i;
12600
12601   for (i = 0; i < 64; i++)
12602     {
12603       result <<= 1;
12604       result |= (value & 1UL);
12605       value >>= 1;
12606     }
12607   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12608 }
12609
12610 /* 32 bit reverse bytes.  */
12611 static void
12612 rev32 (sim_cpu *cpu)
12613 {
12614   unsigned rn = INSTR (9, 5);
12615   unsigned rd = INSTR (4, 0);
12616   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12617   uint32_t result = 0;
12618   int i;
12619
12620   for (i = 0; i < 4; i++)
12621     {
12622       result <<= 8;
12623       result |= (value & 0xff);
12624       value >>= 8;
12625     }
12626   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12627 }
12628
12629 /* 64 bit reverse bytes.  */
12630 static void
12631 rev64 (sim_cpu *cpu)
12632 {
12633   unsigned rn = INSTR (9, 5);
12634   unsigned rd = INSTR (4, 0);
12635   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12636   uint64_t result = 0;
12637   int i;
12638
12639   for (i = 0; i < 8; i++)
12640     {
12641       result <<= 8;
12642       result |= (value & 0xffULL);
12643       value >>= 8;
12644     }
12645   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12646 }
12647
12648 /* 32 bit reverse shorts.  */
12649 /* N.B.this reverses the order of the bytes in each half word.  */
12650 static void
12651 revh32 (sim_cpu *cpu)
12652 {
12653   unsigned rn = INSTR (9, 5);
12654   unsigned rd = INSTR (4, 0);
12655   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12656   uint32_t result = 0;
12657   int i;
12658
12659   for (i = 0; i < 2; i++)
12660     {
12661       result <<= 8;
12662       result |= (value & 0x00ff00ff);
12663       value >>= 8;
12664     }
12665   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12666 }
12667
12668 /* 64 bit reverse shorts.  */
12669 /* N.B.this reverses the order of the bytes in each half word.  */
12670 static void
12671 revh64 (sim_cpu *cpu)
12672 {
12673   unsigned rn = INSTR (9, 5);
12674   unsigned rd = INSTR (4, 0);
12675   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12676   uint64_t result = 0;
12677   int i;
12678
12679   for (i = 0; i < 2; i++)
12680     {
12681       result <<= 8;
12682       result |= (value & 0x00ff00ff00ff00ffULL);
12683       value >>= 8;
12684     }
12685   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12686 }
12687
12688 static void
12689 dexDataProc1Source (sim_cpu *cpu)
12690 {
12691   /* instr[30]    = 1
12692      instr[28,21] = 111010110
12693      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12694      instr[29]    = S : 0 ==> ok, 1 ==> UNALLOC
12695      instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12696      instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12697                              000010 ==> REV, 000011 ==> UNALLOC
12698                              000100 ==> CLZ, 000101 ==> CLS
12699                              ow ==> UNALLOC
12700      instr[9,5]   = rn : may not be SP
12701      instr[4,0]   = rd : may not be SP.  */
12702
12703   uint32_t S = INSTR (29, 29);
12704   uint32_t opcode2 = INSTR (20, 16);
12705   uint32_t opcode = INSTR (15, 10);
12706   uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12707
12708   if (S == 1)
12709     HALT_UNALLOC;
12710
12711   if (opcode2 != 0)
12712     HALT_UNALLOC;
12713
12714   if (opcode & 0x38)
12715     HALT_UNALLOC;
12716
12717   switch (dispatch)
12718     {
12719     case 0: rbit32 (cpu); return;
12720     case 1: revh32 (cpu); return;
12721     case 2: rev32 (cpu); return;
12722     case 4: clz32 (cpu); return;
12723     case 5: cls32 (cpu); return;
12724     case 8: rbit64 (cpu); return;
12725     case 9: revh64 (cpu); return;
12726     case 10:rev32 (cpu); return;
12727     case 11:rev64 (cpu); return;
12728     case 12:clz64 (cpu); return;
12729     case 13:cls64 (cpu); return;
12730     default: HALT_UNALLOC;
12731     }
12732 }
12733
12734 /* Variable shift.
12735    Shifts by count supplied in register.
12736    N.B register args may not be SP.
12737    These all use the shifted auxiliary function for
12738    simplicity and clarity.  Writing the actual shift
12739    inline would avoid a branch and so be faster but
12740    would also necessitate getting signs right.  */
12741
12742 /* 32 bit arithmetic shift right.  */
12743 static void
12744 asrv32 (sim_cpu *cpu)
12745 {
12746   unsigned rm = INSTR (20, 16);
12747   unsigned rn = INSTR (9, 5);
12748   unsigned rd = INSTR (4, 0);
12749
12750   aarch64_set_reg_u64
12751     (cpu, rd, NO_SP,
12752      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12753                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12754 }
12755
12756 /* 64 bit arithmetic shift right.  */
12757 static void
12758 asrv64 (sim_cpu *cpu)
12759 {
12760   unsigned rm = INSTR (20, 16);
12761   unsigned rn = INSTR (9, 5);
12762   unsigned rd = INSTR (4, 0);
12763
12764   aarch64_set_reg_u64
12765     (cpu, rd, NO_SP,
12766      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12767                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12768 }
12769
12770 /* 32 bit logical shift left.  */
12771 static void
12772 lslv32 (sim_cpu *cpu)
12773 {
12774   unsigned rm = INSTR (20, 16);
12775   unsigned rn = INSTR (9, 5);
12776   unsigned rd = INSTR (4, 0);
12777
12778   aarch64_set_reg_u64
12779     (cpu, rd, NO_SP,
12780      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12781                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12782 }
12783
12784 /* 64 bit arithmetic shift left.  */
12785 static void
12786 lslv64 (sim_cpu *cpu)
12787 {
12788   unsigned rm = INSTR (20, 16);
12789   unsigned rn = INSTR (9, 5);
12790   unsigned rd = INSTR (4, 0);
12791
12792   aarch64_set_reg_u64
12793     (cpu, rd, NO_SP,
12794      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12795                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12796 }
12797
12798 /* 32 bit logical shift right.  */
12799 static void
12800 lsrv32 (sim_cpu *cpu)
12801 {
12802   unsigned rm = INSTR (20, 16);
12803   unsigned rn = INSTR (9, 5);
12804   unsigned rd = INSTR (4, 0);
12805
12806   aarch64_set_reg_u64
12807     (cpu, rd, NO_SP,
12808      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12809                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12810 }
12811
12812 /* 64 bit logical shift right.  */
12813 static void
12814 lsrv64 (sim_cpu *cpu)
12815 {
12816   unsigned rm = INSTR (20, 16);
12817   unsigned rn = INSTR (9, 5);
12818   unsigned rd = INSTR (4, 0);
12819
12820   aarch64_set_reg_u64
12821     (cpu, rd, NO_SP,
12822      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12823                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12824 }
12825
12826 /* 32 bit rotate right.  */
12827 static void
12828 rorv32 (sim_cpu *cpu)
12829 {
12830   unsigned rm = INSTR (20, 16);
12831   unsigned rn = INSTR (9, 5);
12832   unsigned rd = INSTR (4, 0);
12833
12834   aarch64_set_reg_u64
12835     (cpu, rd, NO_SP,
12836      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12837                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12838 }
12839
12840 /* 64 bit rotate right.  */
12841 static void
12842 rorv64 (sim_cpu *cpu)
12843 {
12844   unsigned rm = INSTR (20, 16);
12845   unsigned rn = INSTR (9, 5);
12846   unsigned rd = INSTR (4, 0);
12847
12848   aarch64_set_reg_u64
12849     (cpu, rd, NO_SP,
12850      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12851                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12852 }
12853
12854
12855 /* divide.  */
12856
12857 /* 32 bit signed divide.  */
12858 static void
12859 cpuiv32 (sim_cpu *cpu)
12860 {
12861   unsigned rm = INSTR (20, 16);
12862   unsigned rn = INSTR (9, 5);
12863   unsigned rd = INSTR (4, 0);
12864   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12865   /* TODO : check that this rounds towards zero as required.  */
12866   int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12867   int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12868
12869   aarch64_set_reg_s64 (cpu, rd, NO_SP,
12870                        divisor ? ((int32_t) (dividend / divisor)) : 0);
12871 }
12872
12873 /* 64 bit signed divide.  */
12874 static void
12875 cpuiv64 (sim_cpu *cpu)
12876 {
12877   unsigned rm = INSTR (20, 16);
12878   unsigned rn = INSTR (9, 5);
12879   unsigned rd = INSTR (4, 0);
12880
12881   /* TODO : check that this rounds towards zero as required.  */
12882   int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12883
12884   aarch64_set_reg_s64
12885     (cpu, rd, NO_SP,
12886      divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12887 }
12888
12889 /* 32 bit unsigned divide.  */
12890 static void
12891 udiv32 (sim_cpu *cpu)
12892 {
12893   unsigned rm = INSTR (20, 16);
12894   unsigned rn = INSTR (9, 5);
12895   unsigned rd = INSTR (4, 0);
12896
12897   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12898   uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12899   uint64_t divisor  = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12900
12901   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12902                        divisor ? (uint32_t) (dividend / divisor) : 0);
12903 }
12904
12905 /* 64 bit unsigned divide.  */
12906 static void
12907 udiv64 (sim_cpu *cpu)
12908 {
12909   unsigned rm = INSTR (20, 16);
12910   unsigned rn = INSTR (9, 5);
12911   unsigned rd = INSTR (4, 0);
12912
12913   /* TODO : check that this rounds towards zero as required.  */
12914   uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12915
12916   aarch64_set_reg_u64
12917     (cpu, rd, NO_SP,
12918      divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12919 }
12920
12921 static void
12922 dexDataProc2Source (sim_cpu *cpu)
12923 {
12924   /* assert instr[30] == 0
12925      instr[28,21] == 11010110
12926      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12927      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12928      instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12929                              001000 ==> LSLV, 001001 ==> LSRV
12930                              001010 ==> ASRV, 001011 ==> RORV
12931                              ow ==> UNALLOC.  */
12932
12933   uint32_t dispatch;
12934   uint32_t S = INSTR (29, 29);
12935   uint32_t opcode = INSTR (15, 10);
12936
12937   if (S == 1)
12938     HALT_UNALLOC;
12939
12940   if (opcode & 0x34)
12941     HALT_UNALLOC;
12942
12943   dispatch = (  (INSTR (31, 31) << 3)
12944               | (uimm (opcode, 3, 3) << 2)
12945               |  uimm (opcode, 1, 0));
12946   switch (dispatch)
12947     {
12948     case 2:  udiv32 (cpu); return;
12949     case 3:  cpuiv32 (cpu); return;
12950     case 4:  lslv32 (cpu); return;
12951     case 5:  lsrv32 (cpu); return;
12952     case 6:  asrv32 (cpu); return;
12953     case 7:  rorv32 (cpu); return;
12954     case 10: udiv64 (cpu); return;
12955     case 11: cpuiv64 (cpu); return;
12956     case 12: lslv64 (cpu); return;
12957     case 13: lsrv64 (cpu); return;
12958     case 14: asrv64 (cpu); return;
12959     case 15: rorv64 (cpu); return;
12960     default: HALT_UNALLOC;
12961     }
12962 }
12963
12964
12965 /* Multiply.  */
12966
12967 /* 32 bit multiply and add.  */
12968 static void
12969 madd32 (sim_cpu *cpu)
12970 {
12971   unsigned rm = INSTR (20, 16);
12972   unsigned ra = INSTR (14, 10);
12973   unsigned rn = INSTR (9, 5);
12974   unsigned rd = INSTR (4, 0);
12975
12976   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12977   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12978                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
12979                        + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12980                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12981 }
12982
12983 /* 64 bit multiply and add.  */
12984 static void
12985 madd64 (sim_cpu *cpu)
12986 {
12987   unsigned rm = INSTR (20, 16);
12988   unsigned ra = INSTR (14, 10);
12989   unsigned rn = INSTR (9, 5);
12990   unsigned rd = INSTR (4, 0);
12991
12992   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12993   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12994                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
12995                        + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12996                           * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12997 }
12998
12999 /* 32 bit multiply and sub.  */
13000 static void
13001 msub32 (sim_cpu *cpu)
13002 {
13003   unsigned rm = INSTR (20, 16);
13004   unsigned ra = INSTR (14, 10);
13005   unsigned rn = INSTR (9, 5);
13006   unsigned rd = INSTR (4, 0);
13007
13008   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13009   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13010                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
13011                        - aarch64_get_reg_u32 (cpu, rn, NO_SP)
13012                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13013 }
13014
13015 /* 64 bit multiply and sub.  */
13016 static void
13017 msub64 (sim_cpu *cpu)
13018 {
13019   unsigned rm = INSTR (20, 16);
13020   unsigned ra = INSTR (14, 10);
13021   unsigned rn = INSTR (9, 5);
13022   unsigned rd = INSTR (4, 0);
13023
13024   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13025   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13026                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
13027                        - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13028                        * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13029 }
13030
13031 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit.  */
13032 static void
13033 smaddl (sim_cpu *cpu)
13034 {
13035   unsigned rm = INSTR (20, 16);
13036   unsigned ra = INSTR (14, 10);
13037   unsigned rn = INSTR (9, 5);
13038   unsigned rd = INSTR (4, 0);
13039
13040   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13041      obtain a 64 bit product.  */
13042   aarch64_set_reg_s64
13043     (cpu, rd, NO_SP,
13044      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13045      + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13046      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13047 }
13048
13049 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13050 static void
13051 smsubl (sim_cpu *cpu)
13052 {
13053   unsigned rm = INSTR (20, 16);
13054   unsigned ra = INSTR (14, 10);
13055   unsigned rn = INSTR (9, 5);
13056   unsigned rd = INSTR (4, 0);
13057
13058   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13059      obtain a 64 bit product.  */
13060   aarch64_set_reg_s64
13061     (cpu, rd, NO_SP,
13062      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13063      - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13064      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13065 }
13066
13067 /* Integer Multiply/Divide.  */
13068
13069 /* First some macros and a helper function.  */
13070 /* Macros to test or access elements of 64 bit words.  */
13071
13072 /* Mask used to access lo 32 bits of 64 bit unsigned int.  */
13073 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13074 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13075 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13076 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13077 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13078
13079 /* Offset of sign bit in 64 bit signed integger.  */
13080 #define SIGN_SHIFT_U64 63
13081 /* The sign bit itself -- also identifies the minimum negative int value.  */
13082 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13083 /* Return true if a 64 bit signed int presented as an unsigned int is the
13084    most negative value.  */
13085 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13086 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13087    int has its sign bit set to false.  */
13088 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13089 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13090    an unsigned int has its sign bit set or not.  */
13091 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13092 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int.  */
13093 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13094
13095 /* Multiply two 64 bit ints and return.
13096    the hi 64 bits of the 128 bit product.  */
13097
13098 static uint64_t
13099 mul64hi (uint64_t value1, uint64_t value2)
13100 {
13101   uint64_t resultmid1;
13102   uint64_t result;
13103   uint64_t value1_lo = lowWordToU64 (value1);
13104   uint64_t value1_hi = highWordToU64 (value1) ;
13105   uint64_t value2_lo = lowWordToU64 (value2);
13106   uint64_t value2_hi = highWordToU64 (value2);
13107
13108   /* Cross-multiply and collect results.  */
13109   uint64_t xproductlo = value1_lo * value2_lo;
13110   uint64_t xproductmid1 = value1_lo * value2_hi;
13111   uint64_t xproductmid2 = value1_hi * value2_lo;
13112   uint64_t xproducthi = value1_hi * value2_hi;
13113   uint64_t carry = 0;
13114   /* Start accumulating 64 bit results.  */
13115   /* Drop bottom half of lowest cross-product.  */
13116   uint64_t resultmid = xproductlo >> 32;
13117   /* Add in middle products.  */
13118   resultmid = resultmid + xproductmid1;
13119
13120   /* Check for overflow.  */
13121   if (resultmid < xproductmid1)
13122     /* Carry over 1 into top cross-product.  */
13123     carry++;
13124
13125   resultmid1  = resultmid + xproductmid2;
13126
13127   /* Check for overflow.  */
13128   if (resultmid1 < xproductmid2)
13129     /* Carry over 1 into top cross-product.  */
13130     carry++;
13131
13132   /* Drop lowest 32 bits of middle cross-product.  */
13133   result = resultmid1 >> 32;
13134   /* Move carry bit to just above middle cross-product highest bit.  */
13135   carry = carry << 32;
13136
13137   /* Add top cross-product plus and any carry.  */
13138   result += xproducthi + carry;
13139
13140   return result;
13141 }
13142
13143 /* Signed multiply high, source, source2 :
13144    64 bit, dest <-- high 64-bit of result.  */
13145 static void
13146 smulh (sim_cpu *cpu)
13147 {
13148   uint64_t uresult;
13149   int64_t  result;
13150   unsigned rm = INSTR (20, 16);
13151   unsigned rn = INSTR (9, 5);
13152   unsigned rd = INSTR (4, 0);
13153   GReg     ra = INSTR (14, 10);
13154   int64_t  value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13155   int64_t  value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13156   uint64_t uvalue1;
13157   uint64_t uvalue2;
13158   int  negate = 0;
13159
13160   if (ra != R31)
13161     HALT_UNALLOC;
13162
13163   /* Convert to unsigned and use the unsigned mul64hi routine
13164      the fix the sign up afterwards.  */
13165   if (value1 < 0)
13166     {
13167       negate = !negate;
13168       uvalue1 = -value1;
13169     }
13170   else
13171     {
13172       uvalue1 = value1;
13173     }
13174
13175   if (value2 < 0)
13176     {
13177       negate = !negate;
13178       uvalue2 = -value2;
13179     }
13180   else
13181     {
13182       uvalue2 = value2;
13183     }
13184
13185   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13186
13187   uresult = mul64hi (uvalue1, uvalue2);
13188   result = uresult;
13189
13190   if (negate)
13191     {
13192       /* Multiply 128-bit result by -1, which means highpart gets inverted,
13193          and has carry in added only if low part is 0.  */
13194       result = ~result;
13195       if ((uvalue1 * uvalue2) == 0)
13196         result += 1;
13197     }
13198
13199   aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13200 }
13201
13202 /* Unsigned multiply add long -- source, source2 :
13203    32 bit, source3 : 64 bit.  */
13204 static void
13205 umaddl (sim_cpu *cpu)
13206 {
13207   unsigned rm = INSTR (20, 16);
13208   unsigned ra = INSTR (14, 10);
13209   unsigned rn = INSTR (9, 5);
13210   unsigned rd = INSTR (4, 0);
13211
13212   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13213   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13214      obtain a 64 bit product.  */
13215   aarch64_set_reg_u64
13216     (cpu, rd, NO_SP,
13217      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13218      + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13219      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13220 }
13221
13222 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13223 static void
13224 umsubl (sim_cpu *cpu)
13225 {
13226   unsigned rm = INSTR (20, 16);
13227   unsigned ra = INSTR (14, 10);
13228   unsigned rn = INSTR (9, 5);
13229   unsigned rd = INSTR (4, 0);
13230
13231   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13232   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13233      obtain a 64 bit product.  */
13234   aarch64_set_reg_u64
13235     (cpu, rd, NO_SP,
13236      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13237      - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13238      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13239 }
13240
13241 /* Unsigned multiply high, source, source2 :
13242    64 bit, dest <-- high 64-bit of result.  */
13243 static void
13244 umulh (sim_cpu *cpu)
13245 {
13246   unsigned rm = INSTR (20, 16);
13247   unsigned rn = INSTR (9, 5);
13248   unsigned rd = INSTR (4, 0);
13249   GReg     ra = INSTR (14, 10);
13250
13251   if (ra != R31)
13252     HALT_UNALLOC;
13253
13254   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13255   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13256                        mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13257                                 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13258 }
13259
13260 static void
13261 dexDataProc3Source (sim_cpu *cpu)
13262 {
13263   /* assert instr[28,24] == 11011.  */
13264   /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13265      instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13266      instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13267      instr[15] = o0 : 0/1 ==> ok
13268      instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB,     (32/64 bit)
13269                               0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13270                               0100 ==> SMULH,                   (64 bit only)
13271                               1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13272                               1100 ==> UMULH                    (64 bit only)
13273                               ow ==> UNALLOC.  */
13274
13275   uint32_t dispatch;
13276   uint32_t size = INSTR (31, 31);
13277   uint32_t op54 = INSTR (30, 29);
13278   uint32_t op31 = INSTR (23, 21);
13279   uint32_t o0 = INSTR (15, 15);
13280
13281   if (op54 != 0)
13282     HALT_UNALLOC;
13283
13284   if (size == 0)
13285     {
13286       if (op31 != 0)
13287         HALT_UNALLOC;
13288
13289       if (o0 == 0)
13290         madd32 (cpu);
13291       else
13292         msub32 (cpu);
13293       return;
13294     }
13295
13296   dispatch = (op31 << 1) | o0;
13297
13298   switch (dispatch)
13299     {
13300     case 0:  madd64 (cpu); return;
13301     case 1:  msub64 (cpu); return;
13302     case 2:  smaddl (cpu); return;
13303     case 3:  smsubl (cpu); return;
13304     case 4:  smulh (cpu); return;
13305     case 10: umaddl (cpu); return;
13306     case 11: umsubl (cpu); return;
13307     case 12: umulh (cpu); return;
13308     default: HALT_UNALLOC;
13309     }
13310 }
13311
13312 static void
13313 dexDPReg (sim_cpu *cpu)
13314 {
13315   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13316      assert  group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13317      bits [28:24:21] of a DPReg are the secondary dispatch vector.  */
13318   uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13319
13320   switch (group2)
13321     {
13322     case DPREG_LOG_000:
13323     case DPREG_LOG_001:
13324       dexLogicalShiftedRegister (cpu); return;
13325
13326     case DPREG_ADDSHF_010:
13327       dexAddSubtractShiftedRegister (cpu); return;
13328
13329     case DPREG_ADDEXT_011:
13330       dexAddSubtractExtendedRegister (cpu); return;
13331
13332     case DPREG_ADDCOND_100:
13333       {
13334         /* This set bundles a variety of different operations.  */
13335         /* Check for.  */
13336         /* 1) add/sub w carry.  */
13337         uint32_t mask1 = 0x1FE00000U;
13338         uint32_t val1  = 0x1A000000U;
13339         /* 2) cond compare register/immediate.  */
13340         uint32_t mask2 = 0x1FE00000U;
13341         uint32_t val2  = 0x1A400000U;
13342         /* 3) cond select.  */
13343         uint32_t mask3 = 0x1FE00000U;
13344         uint32_t val3  = 0x1A800000U;
13345         /* 4) data proc 1/2 source.  */
13346         uint32_t mask4 = 0x1FE00000U;
13347         uint32_t val4  = 0x1AC00000U;
13348
13349         if ((aarch64_get_instr (cpu) & mask1) == val1)
13350           dexAddSubtractWithCarry (cpu);
13351
13352         else if ((aarch64_get_instr (cpu) & mask2) == val2)
13353           CondCompare (cpu);
13354
13355         else if ((aarch64_get_instr (cpu) & mask3) == val3)
13356           dexCondSelect (cpu);
13357
13358         else if ((aarch64_get_instr (cpu) & mask4) == val4)
13359           {
13360             /* Bit 30 is clear for data proc 2 source
13361                and set for data proc 1 source.  */
13362             if (aarch64_get_instr (cpu)  & (1U << 30))
13363               dexDataProc1Source (cpu);
13364             else
13365               dexDataProc2Source (cpu);
13366           }
13367
13368         else
13369           /* Should not reach here.  */
13370           HALT_NYI;
13371
13372         return;
13373       }
13374
13375     case DPREG_3SRC_110:
13376       dexDataProc3Source (cpu); return;
13377
13378     case DPREG_UNALLOC_101:
13379       HALT_UNALLOC;
13380
13381     case DPREG_3SRC_111:
13382       dexDataProc3Source (cpu); return;
13383
13384     default:
13385       /* Should never reach here.  */
13386       HALT_NYI;
13387     }
13388 }
13389
13390 /* Unconditional Branch immediate.
13391    Offset is a PC-relative byte offset in the range +/- 128MiB.
13392    The offset is assumed to be raw from the decode i.e. the
13393    simulator is expected to scale them from word offsets to byte.  */
13394
13395 /* Unconditional branch.  */
13396 static void
13397 buc (sim_cpu *cpu, int32_t offset)
13398 {
13399   aarch64_set_next_PC_by_offset (cpu, offset);
13400 }
13401
13402 static unsigned stack_depth = 0;
13403
13404 /* Unconditional branch and link -- writes return PC to LR.  */
13405 static void
13406 bl (sim_cpu *cpu, int32_t offset)
13407 {
13408   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13409   aarch64_save_LR (cpu);
13410   aarch64_set_next_PC_by_offset (cpu, offset);
13411
13412   if (TRACE_BRANCH_P (cpu))
13413     {
13414       ++ stack_depth;
13415       TRACE_BRANCH (cpu,
13416                     " %*scall %" PRIx64 " [%s]"
13417                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13418                     stack_depth, " ", aarch64_get_next_PC (cpu),
13419                     aarch64_get_func (CPU_STATE (cpu),
13420                                       aarch64_get_next_PC (cpu)),
13421                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13422                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13423                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13424                     );
13425     }
13426 }
13427
13428 /* Unconditional Branch register.
13429    Branch/return address is in source register.  */
13430
13431 /* Unconditional branch.  */
13432 static void
13433 br (sim_cpu *cpu)
13434 {
13435   unsigned rn = INSTR (9, 5);
13436   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13437   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13438 }
13439
13440 /* Unconditional branch and link -- writes return PC to LR.  */
13441 static void
13442 blr (sim_cpu *cpu)
13443 {
13444   /* Ensure we read the destination before we write LR.  */
13445   uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP);
13446
13447   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13448   aarch64_save_LR (cpu);
13449   aarch64_set_next_PC (cpu, target);
13450
13451   if (TRACE_BRANCH_P (cpu))
13452     {
13453       ++ stack_depth;
13454       TRACE_BRANCH (cpu,
13455                     " %*scall %" PRIx64 " [%s]"
13456                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13457                     stack_depth, " ", aarch64_get_next_PC (cpu),
13458                     aarch64_get_func (CPU_STATE (cpu),
13459                                       aarch64_get_next_PC (cpu)),
13460                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13461                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13462                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13463                     );
13464     }
13465 }
13466
13467 /* Return -- assembler will default source to LR this is functionally
13468    equivalent to br but, presumably, unlike br it side effects the
13469    branch predictor.  */
13470 static void
13471 ret (sim_cpu *cpu)
13472 {
13473   unsigned rn = INSTR (9, 5);
13474   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13475
13476   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13477   if (TRACE_BRANCH_P (cpu))
13478     {
13479       TRACE_BRANCH (cpu,
13480                     " %*sreturn [result: %" PRIx64 "]",
13481                     stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13482       -- stack_depth;
13483     }
13484 }
13485
13486 /* NOP -- we implement this and call it from the decode in case we
13487    want to intercept it later.  */
13488
13489 static void
13490 nop (sim_cpu *cpu)
13491 {
13492   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13493 }
13494
13495 /* Data synchronization barrier.  */
13496
13497 static void
13498 dsb (sim_cpu *cpu)
13499 {
13500   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13501 }
13502
13503 /* Data memory barrier.  */
13504
13505 static void
13506 dmb (sim_cpu *cpu)
13507 {
13508   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13509 }
13510
13511 /* Instruction synchronization barrier.  */
13512
13513 static void
13514 isb (sim_cpu *cpu)
13515 {
13516   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13517 }
13518
13519 static void
13520 dexBranchImmediate (sim_cpu *cpu)
13521 {
13522   /* assert instr[30,26] == 00101
13523      instr[31] ==> 0 == B, 1 == BL
13524      instr[25,0] == imm26 branch offset counted in words.  */
13525
13526   uint32_t top = INSTR (31, 31);
13527   /* We have a 26 byte signed word offset which we need to pass to the
13528      execute routine as a signed byte offset.  */
13529   int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13530
13531   if (top)
13532     bl (cpu, offset);
13533   else
13534     buc (cpu, offset);
13535 }
13536
13537 /* Control Flow.  */
13538
13539 /* Conditional branch
13540
13541    Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13542    a bit position in the range 0 .. 63
13543
13544    cc is a CondCode enum value as pulled out of the decode
13545
13546    N.B. any offset register (source) can only be Xn or Wn.  */
13547
13548 static void
13549 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13550 {
13551   /* The test returns TRUE if CC is met.  */
13552   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13553   if (testConditionCode (cpu, cc))
13554     aarch64_set_next_PC_by_offset (cpu, offset);
13555 }
13556
13557 /* 32 bit branch on register non-zero.  */
13558 static void
13559 cbnz32 (sim_cpu *cpu, int32_t offset)
13560 {
13561   unsigned rt = INSTR (4, 0);
13562
13563   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13564   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13565     aarch64_set_next_PC_by_offset (cpu, offset);
13566 }
13567
13568 /* 64 bit branch on register zero.  */
13569 static void
13570 cbnz (sim_cpu *cpu, int32_t offset)
13571 {
13572   unsigned rt = INSTR (4, 0);
13573
13574   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13575   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13576     aarch64_set_next_PC_by_offset (cpu, offset);
13577 }
13578
13579 /* 32 bit branch on register non-zero.  */
13580 static void
13581 cbz32 (sim_cpu *cpu, int32_t offset)
13582 {
13583   unsigned rt = INSTR (4, 0);
13584
13585   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13586   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13587     aarch64_set_next_PC_by_offset (cpu, offset);
13588 }
13589
13590 /* 64 bit branch on register zero.  */
13591 static void
13592 cbz (sim_cpu *cpu, int32_t offset)
13593 {
13594   unsigned rt = INSTR (4, 0);
13595
13596   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13597   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13598     aarch64_set_next_PC_by_offset (cpu, offset);
13599 }
13600
13601 /* Branch on register bit test non-zero -- one size fits all.  */
13602 static void
13603 tbnz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13604 {
13605   unsigned rt = INSTR (4, 0);
13606
13607   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13608   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13609     aarch64_set_next_PC_by_offset (cpu, offset);
13610 }
13611
13612 /* Branch on register bit test zero -- one size fits all.  */
13613 static void
13614 tbz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13615 {
13616   unsigned rt = INSTR (4, 0);
13617
13618   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13619   if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13620     aarch64_set_next_PC_by_offset (cpu, offset);
13621 }
13622
13623 static void
13624 dexCompareBranchImmediate (sim_cpu *cpu)
13625 {
13626   /* instr[30,25] = 01 1010
13627      instr[31]    = size : 0 ==> 32, 1 ==> 64
13628      instr[24]    = op : 0 ==> CBZ, 1 ==> CBNZ
13629      instr[23,5]  = simm19 branch offset counted in words
13630      instr[4,0]   = rt  */
13631
13632   uint32_t size = INSTR (31, 31);
13633   uint32_t op   = INSTR (24, 24);
13634   int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13635
13636   if (size == 0)
13637     {
13638       if (op == 0)
13639         cbz32 (cpu, offset);
13640       else
13641         cbnz32 (cpu, offset);
13642     }
13643   else
13644     {
13645       if (op == 0)
13646         cbz (cpu, offset);
13647       else
13648         cbnz (cpu, offset);
13649     }
13650 }
13651
13652 static void
13653 dexTestBranchImmediate (sim_cpu *cpu)
13654 {
13655   /* instr[31]    = b5 : bit 5 of test bit idx
13656      instr[30,25] = 01 1011
13657      instr[24]    = op : 0 ==> TBZ, 1 == TBNZ
13658      instr[23,19] = b40 : bits 4 to 0 of test bit idx
13659      instr[18,5]  = simm14 : signed offset counted in words
13660      instr[4,0]   = uimm5  */
13661
13662   uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13663   int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13664
13665   NYI_assert (30, 25, 0x1b);
13666
13667   if (INSTR (24, 24) == 0)
13668     tbz (cpu, pos, offset);
13669   else
13670     tbnz (cpu, pos, offset);
13671 }
13672
13673 static void
13674 dexCondBranchImmediate (sim_cpu *cpu)
13675 {
13676   /* instr[31,25] = 010 1010
13677      instr[24]    = op1; op => 00 ==> B.cond
13678      instr[23,5]  = simm19 : signed offset counted in words
13679      instr[4]     = op0
13680      instr[3,0]   = cond  */
13681
13682   int32_t offset;
13683   uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13684
13685   NYI_assert (31, 25, 0x2a);
13686
13687   if (op != 0)
13688     HALT_UNALLOC;
13689
13690   offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13691
13692   bcc (cpu, offset, INSTR (3, 0));
13693 }
13694
13695 static void
13696 dexBranchRegister (sim_cpu *cpu)
13697 {
13698   /* instr[31,25] = 110 1011
13699      instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13700      instr[20,16] = op2 : must be 11111
13701      instr[15,10] = op3 : must be 000000
13702      instr[4,0]   = op2 : must be 11111.  */
13703
13704   uint32_t op = INSTR (24, 21);
13705   uint32_t op2 = INSTR (20, 16);
13706   uint32_t op3 = INSTR (15, 10);
13707   uint32_t op4 = INSTR (4, 0);
13708
13709   NYI_assert (31, 25, 0x6b);
13710
13711   if (op2 != 0x1F || op3 != 0 || op4 != 0)
13712     HALT_UNALLOC;
13713
13714   if (op == 0)
13715     br (cpu);
13716
13717   else if (op == 1)
13718     blr (cpu);
13719
13720   else if (op == 2)
13721     ret (cpu);
13722
13723   else
13724     {
13725       /* ERET and DRPS accept 0b11111 for rn = instr [4,0].  */
13726       /* anything else is unallocated.  */
13727       uint32_t rn = INSTR (4, 0);
13728
13729       if (rn != 0x1f)
13730         HALT_UNALLOC;
13731
13732       if (op == 4 || op == 5)
13733         HALT_NYI;
13734
13735       HALT_UNALLOC;
13736     }
13737 }
13738
13739 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13740    but this may not be available.  So instead we define the values we need
13741    here.  */
13742 #define AngelSVC_Reason_Open            0x01
13743 #define AngelSVC_Reason_Close           0x02
13744 #define AngelSVC_Reason_Write           0x05
13745 #define AngelSVC_Reason_Read            0x06
13746 #define AngelSVC_Reason_IsTTY           0x09
13747 #define AngelSVC_Reason_Seek            0x0A
13748 #define AngelSVC_Reason_FLen            0x0C
13749 #define AngelSVC_Reason_Remove          0x0E
13750 #define AngelSVC_Reason_Rename          0x0F
13751 #define AngelSVC_Reason_Clock           0x10
13752 #define AngelSVC_Reason_Time            0x11
13753 #define AngelSVC_Reason_System          0x12
13754 #define AngelSVC_Reason_Errno           0x13
13755 #define AngelSVC_Reason_GetCmdLine      0x15
13756 #define AngelSVC_Reason_HeapInfo        0x16
13757 #define AngelSVC_Reason_ReportException 0x18
13758 #define AngelSVC_Reason_Elapsed         0x30
13759
13760
13761 static void
13762 handle_halt (sim_cpu *cpu, uint32_t val)
13763 {
13764   uint64_t result = 0;
13765
13766   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13767   if (val != 0xf000)
13768     {
13769       TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13770       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13771                        sim_stopped, SIM_SIGTRAP);
13772     }
13773
13774   /* We have encountered an Angel SVC call.  See if we can process it.  */
13775   switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13776     {
13777     case AngelSVC_Reason_HeapInfo:
13778       {
13779         /* Get the values.  */
13780         uint64_t stack_top = aarch64_get_stack_start (cpu);
13781         uint64_t heap_base = aarch64_get_heap_start (cpu);
13782
13783         /* Get the pointer  */
13784         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13785         ptr = aarch64_get_mem_u64 (cpu, ptr);
13786
13787         /* Fill in the memory block.  */
13788         /* Start addr of heap.  */
13789         aarch64_set_mem_u64 (cpu, ptr +  0, heap_base);
13790         /* End addr of heap.  */
13791         aarch64_set_mem_u64 (cpu, ptr +  8, stack_top);
13792         /* Lowest stack addr.  */
13793         aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13794         /* Initial stack addr.  */
13795         aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13796
13797         TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13798       }
13799       break;
13800
13801     case AngelSVC_Reason_Open:
13802       {
13803         /* Get the pointer  */
13804         /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);.  */
13805         /* FIXME: For now we just assume that we will only be asked
13806            to open the standard file descriptors.  */
13807         static int fd = 0;
13808         result = fd ++;
13809
13810         TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13811       }
13812       break;
13813
13814     case AngelSVC_Reason_Close:
13815       {
13816         uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13817         TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13818         result = 0;
13819       }
13820       break;
13821
13822     case AngelSVC_Reason_Errno:
13823       result = 0;
13824       TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13825       break;
13826
13827     case AngelSVC_Reason_Clock:
13828       result =
13829 #ifdef CLOCKS_PER_SEC
13830         (CLOCKS_PER_SEC >= 100)
13831         ? (clock () / (CLOCKS_PER_SEC / 100))
13832         : ((clock () * 100) / CLOCKS_PER_SEC)
13833 #else
13834         /* Presume unix... clock() returns microseconds.  */
13835         (clock () / 10000)
13836 #endif
13837         ;
13838         TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13839       break;
13840
13841     case AngelSVC_Reason_GetCmdLine:
13842       {
13843         /* Get the pointer  */
13844         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13845         ptr = aarch64_get_mem_u64 (cpu, ptr);
13846
13847         /* FIXME: No command line for now.  */
13848         aarch64_set_mem_u64 (cpu, ptr, 0);
13849         TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13850       }
13851       break;
13852
13853     case AngelSVC_Reason_IsTTY:
13854       result = 1;
13855         TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13856       break;
13857
13858     case AngelSVC_Reason_Write:
13859       {
13860         /* Get the pointer  */
13861         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13862         /* Get the write control block.  */
13863         uint64_t fd  = aarch64_get_mem_u64 (cpu, ptr);
13864         uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13865         uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13866
13867         TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13868                        PRIx64 " on descriptor %" PRIx64,
13869                        len, buf, fd);
13870
13871         if (len > 1280)
13872           {
13873             TRACE_SYSCALL (cpu,
13874                            " AngelSVC: Write: Suspiciously long write: %ld",
13875                            (long) len);
13876             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13877                              sim_stopped, SIM_SIGBUS);
13878           }
13879         else if (fd == 1)
13880           {
13881             printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13882           }
13883         else if (fd == 2)
13884           {
13885             TRACE (cpu, 0, "\n");
13886             sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13887                             (int) len, aarch64_get_mem_ptr (cpu, buf));
13888             TRACE (cpu, 0, "\n");
13889           }
13890         else
13891           {
13892             TRACE_SYSCALL (cpu,
13893                            " AngelSVC: Write: Unexpected file handle: %d",
13894                            (int) fd);
13895             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13896                              sim_stopped, SIM_SIGABRT);
13897           }
13898       }
13899       break;
13900
13901     case AngelSVC_Reason_ReportException:
13902       {
13903         /* Get the pointer  */
13904         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13905         /*ptr = aarch64_get_mem_u64 (cpu, ptr);.  */
13906         uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13907         uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13908
13909         TRACE_SYSCALL (cpu,
13910                        "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13911                        type, state);
13912
13913         if (type == 0x20026)
13914           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13915                            sim_exited, state);
13916         else
13917           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13918                            sim_stopped, SIM_SIGINT);
13919       }
13920       break;
13921
13922     case AngelSVC_Reason_Read:
13923     case AngelSVC_Reason_FLen:
13924     case AngelSVC_Reason_Seek:
13925     case AngelSVC_Reason_Remove:
13926     case AngelSVC_Reason_Time:
13927     case AngelSVC_Reason_System:
13928     case AngelSVC_Reason_Rename:
13929     case AngelSVC_Reason_Elapsed:
13930     default:
13931       TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13932                      aarch64_get_reg_u32 (cpu, 0, NO_SP));
13933       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13934                        sim_stopped, SIM_SIGTRAP);
13935     }
13936
13937   aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13938 }
13939
13940 static void
13941 dexExcpnGen (sim_cpu *cpu)
13942 {
13943   /* instr[31:24] = 11010100
13944      instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13945                           010 ==> HLT,       101 ==> DBG GEN EXCPN
13946      instr[20,5]  = imm16
13947      instr[4,2]   = opc2 000 ==> OK, ow ==> UNALLOC
13948      instr[1,0]   = LL : discriminates opc  */
13949
13950   uint32_t opc = INSTR (23, 21);
13951   uint32_t imm16 = INSTR (20, 5);
13952   uint32_t opc2 = INSTR (4, 2);
13953   uint32_t LL;
13954
13955   NYI_assert (31, 24, 0xd4);
13956
13957   if (opc2 != 0)
13958     HALT_UNALLOC;
13959
13960   LL = INSTR (1, 0);
13961
13962   /* We only implement HLT and BRK for now.  */
13963   if (opc == 1 && LL == 0)
13964     {
13965       TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13966       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13967                        sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13968     }
13969
13970   if (opc == 2 && LL == 0)
13971     handle_halt (cpu, imm16);
13972
13973   else if (opc == 0 || opc == 5)
13974     HALT_NYI;
13975
13976   else
13977     HALT_UNALLOC;
13978 }
13979
13980 /* Stub for accessing system registers.  */
13981
13982 static uint64_t
13983 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13984             unsigned crm, unsigned op2)
13985 {
13986   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13987     /* DCZID_EL0 - the Data Cache Zero ID register.
13988        We do not support DC ZVA at the moment, so
13989        we return a value with the disable bit set.
13990        We implement support for the DCZID register since
13991        it is used by the C library's memset function.  */
13992     return ((uint64_t) 1) << 4;
13993
13994   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13995     /* Cache Type Register.  */
13996     return 0x80008000UL;
13997
13998   if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13999     /* TPIDR_EL0 - thread pointer id.  */
14000     return aarch64_get_thread_id (cpu);
14001
14002   if (op1 == 3 && crm == 4 && op2 == 0)
14003     return aarch64_get_FPCR (cpu);
14004
14005   if (op1 == 3 && crm == 4 && op2 == 1)
14006     return aarch64_get_FPSR (cpu);
14007
14008   else if (op1 == 3 && crm == 2 && op2 == 0)
14009     return aarch64_get_CPSR (cpu);
14010
14011   HALT_NYI;
14012 }
14013
14014 static void
14015 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14016             unsigned crm, unsigned op2, uint64_t val)
14017 {
14018   if (op1 == 3 && crm == 4 && op2 == 0)
14019     aarch64_set_FPCR (cpu, val);
14020
14021   else if (op1 == 3 && crm == 4 && op2 == 1)
14022     aarch64_set_FPSR (cpu, val);
14023
14024   else if (op1 == 3 && crm == 2 && op2 == 0)
14025     aarch64_set_CPSR (cpu, val);
14026
14027   else
14028     HALT_NYI;
14029 }
14030
14031 static void
14032 do_mrs (sim_cpu *cpu)
14033 {
14034   /* instr[31:20] = 1101 0101 0001 1
14035      instr[19]    = op0
14036      instr[18,16] = op1
14037      instr[15,12] = CRn
14038      instr[11,8]  = CRm
14039      instr[7,5]   = op2
14040      instr[4,0]   = Rt  */
14041   unsigned sys_op0 = INSTR (19, 19) + 2;
14042   unsigned sys_op1 = INSTR (18, 16);
14043   unsigned sys_crn = INSTR (15, 12);
14044   unsigned sys_crm = INSTR (11, 8);
14045   unsigned sys_op2 = INSTR (7, 5);
14046   unsigned rt = INSTR (4, 0);
14047
14048   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14049   aarch64_set_reg_u64 (cpu, rt, NO_SP,
14050                        system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14051 }
14052
14053 static void
14054 do_MSR_immediate (sim_cpu *cpu)
14055 {
14056   /* instr[31:19] = 1101 0101 0000 0
14057      instr[18,16] = op1
14058      instr[15,12] = 0100
14059      instr[11,8]  = CRm
14060      instr[7,5]   = op2
14061      instr[4,0]   = 1 1111  */
14062
14063   unsigned op1 = INSTR (18, 16);
14064   /*unsigned crm = INSTR (11, 8);*/
14065   unsigned op2 = INSTR (7, 5);
14066
14067   NYI_assert (31, 19, 0x1AA0);
14068   NYI_assert (15, 12, 0x4);
14069   NYI_assert (4,  0,  0x1F);
14070
14071   if (op1 == 0)
14072     {
14073       if (op2 == 5)
14074         HALT_NYI; /* set SPSel.  */
14075       else
14076         HALT_UNALLOC;
14077     }
14078   else if (op1 == 3)
14079     {
14080       if (op2 == 6)
14081         HALT_NYI; /* set DAIFset.  */
14082       else if (op2 == 7)
14083         HALT_NYI; /* set DAIFclr.  */
14084       else
14085         HALT_UNALLOC;
14086     }
14087   else
14088     HALT_UNALLOC;
14089 }
14090
14091 static void
14092 do_MSR_reg (sim_cpu *cpu)
14093 {
14094   /* instr[31:20] = 1101 0101 0001
14095      instr[19]    = op0
14096      instr[18,16] = op1
14097      instr[15,12] = CRn
14098      instr[11,8]  = CRm
14099      instr[7,5]   = op2
14100      instr[4,0]   = Rt  */
14101
14102   unsigned sys_op0 = INSTR (19, 19) + 2;
14103   unsigned sys_op1 = INSTR (18, 16);
14104   unsigned sys_crn = INSTR (15, 12);
14105   unsigned sys_crm = INSTR (11, 8);
14106   unsigned sys_op2 = INSTR (7, 5);
14107   unsigned rt = INSTR (4, 0);
14108
14109   NYI_assert (31, 20, 0xD51);
14110
14111   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14112   system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14113               aarch64_get_reg_u64 (cpu, rt, NO_SP));
14114 }
14115
14116 static void
14117 do_SYS (sim_cpu *cpu)
14118 {
14119   /* instr[31,19] = 1101 0101 0000 1
14120      instr[18,16] = op1
14121      instr[15,12] = CRn
14122      instr[11,8]  = CRm
14123      instr[7,5]   = op2
14124      instr[4,0]   = Rt  */
14125   NYI_assert (31, 19, 0x1AA1);
14126
14127   /* FIXME: For now we just silently accept system ops.  */
14128 }
14129
14130 static void
14131 dexSystem (sim_cpu *cpu)
14132 {
14133   /* instr[31:22] = 1101 01010 0
14134      instr[21]    = L
14135      instr[20,19] = op0
14136      instr[18,16] = op1
14137      instr[15,12] = CRn
14138      instr[11,8]  = CRm
14139      instr[7,5]   = op2
14140      instr[4,0]   = uimm5  */
14141
14142   /* We are interested in HINT, DSB, DMB and ISB
14143
14144      Hint #0 encodes NOOP (this is the only hint we care about)
14145      L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14146      CRm op2  != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14147
14148      DSB, DMB, ISB are data store barrier, data memory barrier and
14149      instruction store barrier, respectively, where
14150
14151      L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14152      op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14153      CRm<3:2> ==> domain, CRm<1:0> ==> types,
14154      domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14155               10 ==> InerShareable, 11 ==> FullSystem
14156      types :  01 ==> Reads, 10 ==> Writes,
14157               11 ==> All, 00 ==> All (domain == FullSystem).  */
14158
14159   unsigned rt = INSTR (4, 0);
14160
14161   NYI_assert (31, 22, 0x354);
14162
14163   switch (INSTR (21, 12))
14164     {
14165     case 0x032:
14166       if (rt == 0x1F)
14167         {
14168           /* NOP has CRm != 0000 OR.  */
14169           /*         (CRm == 0000 AND (op2 == 000 OR op2 > 101)).  */
14170           uint32_t crm = INSTR (11, 8);
14171           uint32_t op2 = INSTR (7, 5);
14172
14173           if (crm != 0 || (op2 == 0 || op2 > 5))
14174             {
14175               /* Actually call nop method so we can reimplement it later.  */
14176               nop (cpu);
14177               return;
14178             }
14179         }
14180       HALT_NYI;
14181
14182     case 0x033:
14183       {
14184         uint32_t op2 =  INSTR (7, 5);
14185
14186         switch (op2)
14187           {
14188           case 2: HALT_NYI;
14189           case 4: dsb (cpu); return;
14190           case 5: dmb (cpu); return;
14191           case 6: isb (cpu); return;
14192           default: HALT_UNALLOC;
14193         }
14194       }
14195
14196     case 0x3B0:
14197     case 0x3B4:
14198     case 0x3BD:
14199       do_mrs (cpu);
14200       return;
14201
14202     case 0x0B7:
14203       do_SYS (cpu); /* DC is an alias of SYS.  */
14204       return;
14205
14206     default:
14207       if (INSTR (21, 20) == 0x1)
14208         do_MSR_reg (cpu);
14209       else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14210         do_MSR_immediate (cpu);
14211       else
14212         HALT_NYI;
14213       return;
14214     }
14215 }
14216
14217 static void
14218 dexBr (sim_cpu *cpu)
14219 {
14220   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14221      assert  group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14222      bits [31,29] of a BrExSys are the secondary dispatch vector.  */
14223   uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14224
14225   switch (group2)
14226     {
14227     case BR_IMM_000:
14228       return dexBranchImmediate (cpu);
14229
14230     case BR_IMMCMP_001:
14231       /* Compare has bit 25 clear while test has it set.  */
14232       if (!INSTR (25, 25))
14233         dexCompareBranchImmediate (cpu);
14234       else
14235         dexTestBranchImmediate (cpu);
14236       return;
14237
14238     case BR_IMMCOND_010:
14239       /* This is a conditional branch if bit 25 is clear otherwise
14240          unallocated.  */
14241       if (!INSTR (25, 25))
14242         dexCondBranchImmediate (cpu);
14243       else
14244         HALT_UNALLOC;
14245       return;
14246
14247     case BR_UNALLOC_011:
14248       HALT_UNALLOC;
14249
14250     case BR_IMM_100:
14251       dexBranchImmediate (cpu);
14252       return;
14253
14254     case BR_IMMCMP_101:
14255       /* Compare has bit 25 clear while test has it set.  */
14256       if (!INSTR (25, 25))
14257         dexCompareBranchImmediate (cpu);
14258       else
14259         dexTestBranchImmediate (cpu);
14260       return;
14261
14262     case BR_REG_110:
14263       /* Unconditional branch reg has bit 25 set.  */
14264       if (INSTR (25, 25))
14265         dexBranchRegister (cpu);
14266
14267       /* This includes both Excpn Gen, System and unalloc operations.
14268          We need to decode the Excpn Gen operation BRK so we can plant
14269          debugger entry points.
14270          Excpn Gen operations have instr [24] = 0.
14271          we need to decode at least one of the System operations NOP
14272          which is an alias for HINT #0.
14273          System operations have instr [24,22] = 100.  */
14274       else if (INSTR (24, 24) == 0)
14275         dexExcpnGen (cpu);
14276
14277       else if (INSTR (24, 22) == 4)
14278         dexSystem (cpu);
14279
14280       else
14281         HALT_UNALLOC;
14282
14283       return;
14284
14285     case BR_UNALLOC_111:
14286       HALT_UNALLOC;
14287
14288     default:
14289       /* Should never reach here.  */
14290       HALT_NYI;
14291     }
14292 }
14293
14294 static void
14295 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14296 {
14297   /* We need to check if gdb wants an in here.  */
14298   /* checkBreak (cpu);.  */
14299
14300   uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14301
14302   switch (group)
14303     {
14304     case GROUP_PSEUDO_0000:   dexPseudo (cpu); break;
14305     case GROUP_LDST_0100:     dexLdSt (cpu); break;
14306     case GROUP_DPREG_0101:    dexDPReg (cpu); break;
14307     case GROUP_LDST_0110:     dexLdSt (cpu); break;
14308     case GROUP_ADVSIMD_0111:  dexAdvSIMD0 (cpu); break;
14309     case GROUP_DPIMM_1000:    dexDPImm (cpu); break;
14310     case GROUP_DPIMM_1001:    dexDPImm (cpu); break;
14311     case GROUP_BREXSYS_1010:  dexBr (cpu); break;
14312     case GROUP_BREXSYS_1011:  dexBr (cpu); break;
14313     case GROUP_LDST_1100:     dexLdSt (cpu); break;
14314     case GROUP_DPREG_1101:    dexDPReg (cpu); break;
14315     case GROUP_LDST_1110:     dexLdSt (cpu); break;
14316     case GROUP_ADVSIMD_1111:  dexAdvSIMD1 (cpu); break;
14317
14318     case GROUP_UNALLOC_0001:
14319     case GROUP_UNALLOC_0010:
14320     case GROUP_UNALLOC_0011:
14321       HALT_UNALLOC;
14322
14323     default:
14324       /* Should never reach here.  */
14325       HALT_NYI;
14326     }
14327 }
14328
14329 static bfd_boolean
14330 aarch64_step (sim_cpu *cpu)
14331 {
14332   uint64_t pc = aarch64_get_PC (cpu);
14333
14334   if (pc == TOP_LEVEL_RETURN_PC)
14335     return FALSE;
14336
14337   aarch64_set_next_PC (cpu, pc + 4);
14338
14339   /* Code is always little-endian.  */
14340   sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14341                         & aarch64_get_instr (cpu), pc, 4);
14342   aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14343
14344   TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14345               aarch64_get_instr (cpu));
14346   TRACE_DISASM (cpu, pc);
14347
14348   aarch64_decode_and_execute (cpu, pc);
14349
14350   return TRUE;
14351 }
14352
14353 void
14354 aarch64_run (SIM_DESC sd)
14355 {
14356   sim_cpu *cpu = STATE_CPU (sd, 0);
14357
14358   while (aarch64_step (cpu))
14359     {
14360       aarch64_update_PC (cpu);
14361
14362       if (sim_events_tick (sd))
14363         sim_events_process (sd);
14364     }
14365
14366   sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14367                    sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14368 }
14369
14370 void
14371 aarch64_init (sim_cpu *cpu, uint64_t pc)
14372 {
14373   uint64_t sp = aarch64_get_stack_start (cpu);
14374
14375   /* Install SP, FP and PC and set LR to -20
14376      so we can detect a top-level return.  */
14377   aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14378   aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14379   aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14380   aarch64_set_next_PC (cpu, pc);
14381   aarch64_update_PC (cpu);
14382   aarch64_init_LIT_table ();
14383 }